From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- security/nss/lib/freebl/Makefile | 810 + security/nss/lib/freebl/aes-armv8.c | 1169 ++ security/nss/lib/freebl/aes-armv8.h | 103 + security/nss/lib/freebl/aes-x86.c | 184 + security/nss/lib/freebl/aeskeywrap.c | 642 + security/nss/lib/freebl/alghmac.c | 209 + security/nss/lib/freebl/alghmac.h | 70 + security/nss/lib/freebl/altivec-types.h | 25 + security/nss/lib/freebl/arcfive.c | 87 + security/nss/lib/freebl/arcfour-amd64-gas.s | 88 + security/nss/lib/freebl/arcfour-amd64-masm.asm | 107 + security/nss/lib/freebl/arcfour-amd64-sun.s | 84 + security/nss/lib/freebl/arcfour.c | 594 + security/nss/lib/freebl/blake2b.c | 428 + security/nss/lib/freebl/blake2b.h | 23 + security/nss/lib/freebl/blapi.h | 1796 ++ security/nss/lib/freebl/blapii.h | 111 + security/nss/lib/freebl/blapit.h | 455 + security/nss/lib/freebl/blinit.c | 573 + security/nss/lib/freebl/blname.c | 100 + security/nss/lib/freebl/camellia.c | 1896 ++ security/nss/lib/freebl/camellia.h | 42 + security/nss/lib/freebl/chacha20-ppc64le.S | 668 + security/nss/lib/freebl/chacha20poly1305-ppc.c | 588 + security/nss/lib/freebl/chacha20poly1305.c | 549 + security/nss/lib/freebl/chacha20poly1305.h | 21 + security/nss/lib/freebl/cmac.c | 323 + security/nss/lib/freebl/cmac.h | 47 + security/nss/lib/freebl/config.mk | 93 + security/nss/lib/freebl/crypto_primitives.c | 36 + security/nss/lib/freebl/crypto_primitives.h | 66 + security/nss/lib/freebl/ctr.c | 276 + security/nss/lib/freebl/ctr.h | 52 + security/nss/lib/freebl/cts.c | 303 + security/nss/lib/freebl/cts.h | 33 + security/nss/lib/freebl/deprecated/alg2268.c | 509 + security/nss/lib/freebl/deprecated/seed.c | 671 + security/nss/lib/freebl/deprecated/seed.h | 125 + security/nss/lib/freebl/des.c | 676 + security/nss/lib/freebl/des.h | 43 + security/nss/lib/freebl/desblapi.c | 256 + security/nss/lib/freebl/det_rng.c | 163 + security/nss/lib/freebl/det_rng.h | 12 + security/nss/lib/freebl/dh.c | 480 + security/nss/lib/freebl/drbg.c | 1024 + security/nss/lib/freebl/dsa.c | 691 + security/nss/lib/freebl/ec.c | 1170 ++ security/nss/lib/freebl/ec.h | 21 + security/nss/lib/freebl/ecdecode.c | 252 + security/nss/lib/freebl/ecl/README | 163 + security/nss/lib/freebl/ecl/curve25519_32.c | 1213 ++ security/nss/lib/freebl/ecl/curve25519_64.c | 24 + security/nss/lib/freebl/ecl/ec_naf.c | 68 + security/nss/lib/freebl/ecl/ecl-curve.h | 245 + security/nss/lib/freebl/ecl/ecl-exp.h | 167 + security/nss/lib/freebl/ecl/ecl-priv.h | 252 + security/nss/lib/freebl/ecl/ecl.c | 329 + security/nss/lib/freebl/ecl/ecl.h | 49 + security/nss/lib/freebl/ecl/ecl_gf.c | 958 + security/nss/lib/freebl/ecl/ecl_mult.c | 305 + security/nss/lib/freebl/ecl/eclt.h | 30 + security/nss/lib/freebl/ecl/ecp.h | 106 + security/nss/lib/freebl/ecl/ecp_25519.c | 126 + security/nss/lib/freebl/ecl/ecp_256.c | 401 + security/nss/lib/freebl/ecl/ecp_256_32.c | 1535 ++ security/nss/lib/freebl/ecl/ecp_384.c | 258 + security/nss/lib/freebl/ecl/ecp_521.c | 137 + security/nss/lib/freebl/ecl/ecp_aff.c | 308 + security/nss/lib/freebl/ecl/ecp_jac.c | 513 + security/nss/lib/freebl/ecl/ecp_jm.c | 297 + security/nss/lib/freebl/ecl/ecp_mont.c | 154 + security/nss/lib/freebl/ecl/ecp_secp384r1.c | 20817 +++++++++++++++++++ security/nss/lib/freebl/ecl/ecp_secp521r1.c | 12082 +++++++++++ security/nss/lib/freebl/exports.gyp | 51 + security/nss/lib/freebl/fipsfreebl.c | 2251 ++ security/nss/lib/freebl/freebl.def | 26 + security/nss/lib/freebl/freebl.gyp | 954 + security/nss/lib/freebl/freebl.rc | 68 + security/nss/lib/freebl/freebl_base.gypi | 236 + security/nss/lib/freebl/freebl_hash.def | 39 + security/nss/lib/freebl/freebl_hash_vector.def | 34 + security/nss/lib/freebl/freeblver.c | 18 + security/nss/lib/freebl/gcm-aarch64.c | 96 + security/nss/lib/freebl/gcm-arm32-neon.c | 202 + security/nss/lib/freebl/gcm-ppc.c | 109 + security/nss/lib/freebl/gcm-x86.c | 127 + security/nss/lib/freebl/gcm.c | 1171 ++ security/nss/lib/freebl/gcm.h | 125 + security/nss/lib/freebl/genload.c | 167 + security/nss/lib/freebl/hmacct.c | 325 + security/nss/lib/freebl/hmacct.h | 38 + security/nss/lib/freebl/intel-aes-x64-masm.asm | 964 + security/nss/lib/freebl/intel-aes-x86-masm.asm | 942 + security/nss/lib/freebl/intel-aes.h | 143 + security/nss/lib/freebl/intel-aes.s | 2485 +++ security/nss/lib/freebl/intel-gcm-wrap.c | 475 + security/nss/lib/freebl/intel-gcm-x64-masm.asm | 1294 ++ security/nss/lib/freebl/intel-gcm-x86-masm.asm | 1207 ++ security/nss/lib/freebl/intel-gcm.h | 97 + security/nss/lib/freebl/intel-gcm.s | 1340 ++ security/nss/lib/freebl/jpake.c | 495 + security/nss/lib/freebl/ldvector.c | 433 + security/nss/lib/freebl/loader.c | 2448 +++ security/nss/lib/freebl/loader.h | 936 + security/nss/lib/freebl/lowhash_vector.c | 224 + security/nss/lib/freebl/manifest.mn | 201 + security/nss/lib/freebl/md2.c | 269 + security/nss/lib/freebl/md5.c | 598 + security/nss/lib/freebl/mknewpc2.c | 208 + security/nss/lib/freebl/mksp.c | 119 + security/nss/lib/freebl/mpi/README | 646 + security/nss/lib/freebl/mpi/doc/LICENSE | 11 + security/nss/lib/freebl/mpi/doc/LICENSE-MPL | 3 + security/nss/lib/freebl/mpi/doc/basecvt.pod | 65 + security/nss/lib/freebl/mpi/doc/build | 30 + security/nss/lib/freebl/mpi/doc/div.txt | 64 + security/nss/lib/freebl/mpi/doc/expt.txt | 94 + security/nss/lib/freebl/mpi/doc/gcd.pod | 28 + security/nss/lib/freebl/mpi/doc/invmod.pod | 34 + security/nss/lib/freebl/mpi/doc/isprime.pod | 63 + security/nss/lib/freebl/mpi/doc/lap.pod | 36 + security/nss/lib/freebl/mpi/doc/mpi-test.pod | 51 + security/nss/lib/freebl/mpi/doc/mul.txt | 77 + security/nss/lib/freebl/mpi/doc/pi.txt | 53 + security/nss/lib/freebl/mpi/doc/prime.txt | 6542 ++++++ security/nss/lib/freebl/mpi/doc/prng.pod | 38 + security/nss/lib/freebl/mpi/doc/redux.txt | 86 + security/nss/lib/freebl/mpi/doc/sqrt.txt | 50 + security/nss/lib/freebl/mpi/doc/square.txt | 72 + security/nss/lib/freebl/mpi/doc/timing.txt | 213 + security/nss/lib/freebl/mpi/hpma512.s | 615 + security/nss/lib/freebl/mpi/hppa20.s | 904 + security/nss/lib/freebl/mpi/logtab.h | 28 + security/nss/lib/freebl/mpi/montmulf.c | 286 + security/nss/lib/freebl/mpi/montmulf.h | 65 + security/nss/lib/freebl/mpi/montmulf.il | 108 + security/nss/lib/freebl/mpi/montmulf.s | 1938 ++ security/nss/lib/freebl/mpi/montmulfv8.il | 108 + security/nss/lib/freebl/mpi/montmulfv8.s | 1818 ++ security/nss/lib/freebl/mpi/montmulfv9.il | 93 + security/nss/lib/freebl/mpi/montmulfv9.s | 2346 +++ security/nss/lib/freebl/mpi/mp_comba.c | 3235 +++ .../nss/lib/freebl/mpi/mp_comba_amd64_masm.asm | 13066 ++++++++++++ security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s | 16097 ++++++++++++++ security/nss/lib/freebl/mpi/mp_gf2m-priv.h | 73 + security/nss/lib/freebl/mpi/mp_gf2m.c | 677 + security/nss/lib/freebl/mpi/mp_gf2m.h | 28 + security/nss/lib/freebl/mpi/mpcpucache.c | 788 + security/nss/lib/freebl/mpi/mpcpucache_amd64.s | 861 + security/nss/lib/freebl/mpi/mpcpucache_x86.s | 902 + security/nss/lib/freebl/mpi/mpi-config.h | 56 + security/nss/lib/freebl/mpi/mpi-priv.h | 243 + security/nss/lib/freebl/mpi/mpi.c | 4975 +++++ security/nss/lib/freebl/mpi/mpi.h | 322 + security/nss/lib/freebl/mpi/mpi_amd64.c | 32 + security/nss/lib/freebl/mpi/mpi_amd64_common.S | 409 + security/nss/lib/freebl/mpi/mpi_amd64_masm.asm | 388 + security/nss/lib/freebl/mpi/mpi_amd64_sun.s | 385 + security/nss/lib/freebl/mpi/mpi_arm.c | 175 + security/nss/lib/freebl/mpi/mpi_hp.c | 81 + security/nss/lib/freebl/mpi/mpi_i86pc.s | 313 + security/nss/lib/freebl/mpi/mpi_mips.s | 472 + security/nss/lib/freebl/mpi/mpi_sparc.c | 226 + security/nss/lib/freebl/mpi/mpi_sse2.s | 294 + security/nss/lib/freebl/mpi/mpi_x86.s | 541 + security/nss/lib/freebl/mpi/mpi_x86_asm.c | 531 + security/nss/lib/freebl/mpi/mpi_x86_os2.s | 538 + security/nss/lib/freebl/mpi/mplogic.c | 460 + security/nss/lib/freebl/mpi/mplogic.h | 55 + security/nss/lib/freebl/mpi/mpmontg.c | 1151 + security/nss/lib/freebl/mpi/mpprime.c | 610 + security/nss/lib/freebl/mpi/mpprime.h | 48 + security/nss/lib/freebl/mpi/mpv_sparc.c | 221 + security/nss/lib/freebl/mpi/mpv_sparcv8.s | 1607 ++ security/nss/lib/freebl/mpi/mpv_sparcv9.s | 1645 ++ security/nss/lib/freebl/mpi/mpvalpha.c | 183 + security/nss/lib/freebl/mpi/mulsqr.c | 84 + security/nss/lib/freebl/mpi/primes.c | 841 + security/nss/lib/freebl/mpi/vis_32.il | 1291 ++ security/nss/lib/freebl/mpi/vis_64.il | 997 + security/nss/lib/freebl/mpi/vis_proto.h | 234 + security/nss/lib/freebl/nsslowhash.c | 161 + security/nss/lib/freebl/nsslowhash.h | 33 + security/nss/lib/freebl/ppc-crypto.h | 31 + security/nss/lib/freebl/ppc-gcm-wrap.c | 458 + security/nss/lib/freebl/ppc-gcm.h | 76 + security/nss/lib/freebl/ppc-gcm.s | 1051 + security/nss/lib/freebl/pqg.c | 1926 ++ security/nss/lib/freebl/pqg.h | 25 + security/nss/lib/freebl/rawhash.c | 154 + security/nss/lib/freebl/ret_cr16.s | 27 + security/nss/lib/freebl/rijndael.c | 1265 ++ security/nss/lib/freebl/rijndael.h | 80 + security/nss/lib/freebl/rijndael32.tab | 1219 ++ security/nss/lib/freebl/rijndael_tables.c | 213 + security/nss/lib/freebl/rsa.c | 1710 ++ security/nss/lib/freebl/rsapkcs.c | 1705 ++ security/nss/lib/freebl/scripts/LICENSE | 36 + security/nss/lib/freebl/scripts/gen.sh | 19 + security/nss/lib/freebl/scripts/ppc-xlate.pl | 352 + security/nss/lib/freebl/scripts/sha512p8-ppc.pl | 413 + security/nss/lib/freebl/secmpi.c | 28 + security/nss/lib/freebl/secmpi.h | 63 + security/nss/lib/freebl/secrng.h | 65 + security/nss/lib/freebl/sha-fast-amd64-sun.s | 2151 ++ security/nss/lib/freebl/sha1-armv8.c | 264 + security/nss/lib/freebl/sha256-armv8.c | 203 + security/nss/lib/freebl/sha256-x86.c | 236 + security/nss/lib/freebl/sha256.h | 27 + security/nss/lib/freebl/sha512-p8.s | 851 + security/nss/lib/freebl/sha512.c | 1776 ++ security/nss/lib/freebl/sha_fast.c | 592 + security/nss/lib/freebl/sha_fast.h | 186 + security/nss/lib/freebl/shsign.h | 26 + security/nss/lib/freebl/shvfy.c | 664 + security/nss/lib/freebl/stubs.c | 835 + security/nss/lib/freebl/stubs.h | 71 + security/nss/lib/freebl/sysrand.c | 18 + security/nss/lib/freebl/tlsprfalg.c | 134 + security/nss/lib/freebl/unix_rand.c | 811 + security/nss/lib/freebl/unix_urandom.c | 84 + .../nss/lib/freebl/verified/Hacl_Bignum25519_51.h | 678 + security/nss/lib/freebl/verified/Hacl_Chacha20.c | 226 + security/nss/lib/freebl/verified/Hacl_Chacha20.h | 61 + .../freebl/verified/Hacl_Chacha20Poly1305_128.c | 1177 ++ .../freebl/verified/Hacl_Chacha20Poly1305_128.h | 67 + .../freebl/verified/Hacl_Chacha20Poly1305_256.c | 1179 ++ .../freebl/verified/Hacl_Chacha20Poly1305_256.h | 67 + .../lib/freebl/verified/Hacl_Chacha20Poly1305_32.c | 592 + .../lib/freebl/verified/Hacl_Chacha20Poly1305_32.h | 67 + .../nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c | 819 + .../nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h | 61 + .../nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c | 1207 ++ .../nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h | 61 + .../nss/lib/freebl/verified/Hacl_Curve25519_51.c | 296 + .../nss/lib/freebl/verified/Hacl_Curve25519_51.h | 50 + .../nss/lib/freebl/verified/Hacl_Curve25519_64.c | 400 + .../nss/lib/freebl/verified/Hacl_Curve25519_64.h | 49 + security/nss/lib/freebl/verified/Hacl_Krmllib.h | 81 + .../nss/lib/freebl/verified/Hacl_Poly1305_128.c | 1616 ++ .../nss/lib/freebl/verified/Hacl_Poly1305_128.h | 63 + .../nss/lib/freebl/verified/Hacl_Poly1305_256.c | 2087 ++ .../nss/lib/freebl/verified/Hacl_Poly1305_256.h | 63 + .../nss/lib/freebl/verified/Hacl_Poly1305_32.c | 574 + .../nss/lib/freebl/verified/Hacl_Poly1305_32.h | 55 + security/nss/lib/freebl/verified/config.h | 0 .../nss/lib/freebl/verified/curve25519-inline.h | 942 + .../nss/lib/freebl/verified/internal/Hacl_Bignum.h | 312 + .../lib/freebl/verified/internal/Hacl_Chacha20.h | 50 + .../freebl/verified/internal/Hacl_Curve25519_51.h | 53 + .../lib/freebl/verified/internal/Hacl_Hash_SHA1.h | 49 + .../lib/freebl/verified/internal/Hacl_Hash_SHA2.h | 65 + .../lib/freebl/verified/internal/Hacl_Krmllib.h | 45 + .../freebl/verified/internal/Hacl_Poly1305_128.h | 51 + .../freebl/verified/internal/Hacl_Poly1305_256.h | 51 + .../nss/lib/freebl/verified/internal/Hacl_Spec.h | 59 + security/nss/lib/freebl/verified/internal/Vale.h | 184 + .../verified/karamel/include/krml/c_endianness.h | 13 + .../verified/karamel/include/krml/fstar_int.h | 89 + .../karamel/include/krml/internal/builtin.h | 16 + .../karamel/include/krml/internal/callconv.h | 46 + .../karamel/include/krml/internal/compat.h | 32 + .../verified/karamel/include/krml/internal/debug.h | 57 + .../karamel/include/krml/internal/target.h | 333 + .../verified/karamel/include/krml/internal/types.h | 105 + .../karamel/include/krml/internal/wasmsupport.h | 5 + .../karamel/include/krml/lowstar_endianness.h | 242 + .../lib/freebl/verified/karamel/include/krmllib.h | 28 + .../karamel/krmllib/dist/minimal/FStar_UInt128.h | 75 + .../krmllib/dist/minimal/FStar_UInt128_Verified.h | 327 + .../krmllib/dist/minimal/FStar_UInt_8_16_32_64.h | 218 + .../krmllib/dist/minimal/LowStar_Endianness.h | 25 + .../karamel/krmllib/dist/minimal/Makefile.basic | 56 + .../karamel/krmllib/dist/minimal/Makefile.include | 5 + .../krmllib/dist/minimal/fstar_uint128_gcc64.h | 225 + .../krmllib/dist/minimal/fstar_uint128_msvc.h | 571 + .../dist/minimal/fstar_uint128_struct_endianness.h | 84 + .../karamel/krmllib/dist/minimal/libkrmllib.def | 11 + security/nss/lib/freebl/verified/libintvector.h | 915 + security/nss/lib/freebl/win_rand.c | 161 + 280 files changed, 189031 insertions(+) create mode 100644 security/nss/lib/freebl/Makefile create mode 100644 security/nss/lib/freebl/aes-armv8.c create mode 100644 security/nss/lib/freebl/aes-armv8.h create mode 100644 security/nss/lib/freebl/aes-x86.c create mode 100644 security/nss/lib/freebl/aeskeywrap.c create mode 100644 security/nss/lib/freebl/alghmac.c create mode 100644 security/nss/lib/freebl/alghmac.h create mode 100644 security/nss/lib/freebl/altivec-types.h create mode 100644 security/nss/lib/freebl/arcfive.c create mode 100644 security/nss/lib/freebl/arcfour-amd64-gas.s create mode 100644 security/nss/lib/freebl/arcfour-amd64-masm.asm create mode 100644 security/nss/lib/freebl/arcfour-amd64-sun.s create mode 100644 security/nss/lib/freebl/arcfour.c create mode 100644 security/nss/lib/freebl/blake2b.c create mode 100644 security/nss/lib/freebl/blake2b.h create mode 100644 security/nss/lib/freebl/blapi.h create mode 100644 security/nss/lib/freebl/blapii.h create mode 100644 security/nss/lib/freebl/blapit.h create mode 100644 security/nss/lib/freebl/blinit.c create mode 100644 security/nss/lib/freebl/blname.c create mode 100644 security/nss/lib/freebl/camellia.c create mode 100644 security/nss/lib/freebl/camellia.h create mode 100644 security/nss/lib/freebl/chacha20-ppc64le.S create mode 100644 security/nss/lib/freebl/chacha20poly1305-ppc.c create mode 100644 security/nss/lib/freebl/chacha20poly1305.c create mode 100644 security/nss/lib/freebl/chacha20poly1305.h create mode 100644 security/nss/lib/freebl/cmac.c create mode 100644 security/nss/lib/freebl/cmac.h create mode 100644 security/nss/lib/freebl/config.mk create mode 100644 security/nss/lib/freebl/crypto_primitives.c create mode 100644 security/nss/lib/freebl/crypto_primitives.h create mode 100644 security/nss/lib/freebl/ctr.c create mode 100644 security/nss/lib/freebl/ctr.h create mode 100644 security/nss/lib/freebl/cts.c create mode 100644 security/nss/lib/freebl/cts.h create mode 100644 security/nss/lib/freebl/deprecated/alg2268.c create mode 100644 security/nss/lib/freebl/deprecated/seed.c create mode 100644 security/nss/lib/freebl/deprecated/seed.h create mode 100644 security/nss/lib/freebl/des.c create mode 100644 security/nss/lib/freebl/des.h create mode 100644 security/nss/lib/freebl/desblapi.c create mode 100644 security/nss/lib/freebl/det_rng.c create mode 100644 security/nss/lib/freebl/det_rng.h create mode 100644 security/nss/lib/freebl/dh.c create mode 100644 security/nss/lib/freebl/drbg.c create mode 100644 security/nss/lib/freebl/dsa.c create mode 100644 security/nss/lib/freebl/ec.c create mode 100644 security/nss/lib/freebl/ec.h create mode 100644 security/nss/lib/freebl/ecdecode.c create mode 100644 security/nss/lib/freebl/ecl/README create mode 100644 security/nss/lib/freebl/ecl/curve25519_32.c create mode 100644 security/nss/lib/freebl/ecl/curve25519_64.c create mode 100644 security/nss/lib/freebl/ecl/ec_naf.c create mode 100644 security/nss/lib/freebl/ecl/ecl-curve.h create mode 100644 security/nss/lib/freebl/ecl/ecl-exp.h create mode 100644 security/nss/lib/freebl/ecl/ecl-priv.h create mode 100644 security/nss/lib/freebl/ecl/ecl.c create mode 100644 security/nss/lib/freebl/ecl/ecl.h create mode 100644 security/nss/lib/freebl/ecl/ecl_gf.c create mode 100644 security/nss/lib/freebl/ecl/ecl_mult.c create mode 100644 security/nss/lib/freebl/ecl/eclt.h create mode 100644 security/nss/lib/freebl/ecl/ecp.h create mode 100644 security/nss/lib/freebl/ecl/ecp_25519.c create mode 100644 security/nss/lib/freebl/ecl/ecp_256.c create mode 100644 security/nss/lib/freebl/ecl/ecp_256_32.c create mode 100644 security/nss/lib/freebl/ecl/ecp_384.c create mode 100644 security/nss/lib/freebl/ecl/ecp_521.c create mode 100644 security/nss/lib/freebl/ecl/ecp_aff.c create mode 100644 security/nss/lib/freebl/ecl/ecp_jac.c create mode 100644 security/nss/lib/freebl/ecl/ecp_jm.c create mode 100644 security/nss/lib/freebl/ecl/ecp_mont.c create mode 100644 security/nss/lib/freebl/ecl/ecp_secp384r1.c create mode 100644 security/nss/lib/freebl/ecl/ecp_secp521r1.c create mode 100644 security/nss/lib/freebl/exports.gyp create mode 100644 security/nss/lib/freebl/fipsfreebl.c create mode 100644 security/nss/lib/freebl/freebl.def create mode 100644 security/nss/lib/freebl/freebl.gyp create mode 100644 security/nss/lib/freebl/freebl.rc create mode 100644 security/nss/lib/freebl/freebl_base.gypi create mode 100644 security/nss/lib/freebl/freebl_hash.def create mode 100644 security/nss/lib/freebl/freebl_hash_vector.def create mode 100644 security/nss/lib/freebl/freeblver.c create mode 100644 security/nss/lib/freebl/gcm-aarch64.c create mode 100644 security/nss/lib/freebl/gcm-arm32-neon.c create mode 100644 security/nss/lib/freebl/gcm-ppc.c create mode 100644 security/nss/lib/freebl/gcm-x86.c create mode 100644 security/nss/lib/freebl/gcm.c create mode 100644 security/nss/lib/freebl/gcm.h create mode 100644 security/nss/lib/freebl/genload.c create mode 100644 security/nss/lib/freebl/hmacct.c create mode 100644 security/nss/lib/freebl/hmacct.h create mode 100644 security/nss/lib/freebl/intel-aes-x64-masm.asm create mode 100644 security/nss/lib/freebl/intel-aes-x86-masm.asm create mode 100644 security/nss/lib/freebl/intel-aes.h create mode 100644 security/nss/lib/freebl/intel-aes.s create mode 100644 security/nss/lib/freebl/intel-gcm-wrap.c create mode 100644 security/nss/lib/freebl/intel-gcm-x64-masm.asm create mode 100644 security/nss/lib/freebl/intel-gcm-x86-masm.asm create mode 100644 security/nss/lib/freebl/intel-gcm.h create mode 100644 security/nss/lib/freebl/intel-gcm.s create mode 100644 security/nss/lib/freebl/jpake.c create mode 100644 security/nss/lib/freebl/ldvector.c create mode 100644 security/nss/lib/freebl/loader.c create mode 100644 security/nss/lib/freebl/loader.h create mode 100644 security/nss/lib/freebl/lowhash_vector.c create mode 100644 security/nss/lib/freebl/manifest.mn create mode 100644 security/nss/lib/freebl/md2.c create mode 100644 security/nss/lib/freebl/md5.c create mode 100644 security/nss/lib/freebl/mknewpc2.c create mode 100644 security/nss/lib/freebl/mksp.c create mode 100644 security/nss/lib/freebl/mpi/README create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE-MPL create mode 100644 security/nss/lib/freebl/mpi/doc/basecvt.pod create mode 100755 security/nss/lib/freebl/mpi/doc/build create mode 100644 security/nss/lib/freebl/mpi/doc/div.txt create mode 100644 security/nss/lib/freebl/mpi/doc/expt.txt create mode 100644 security/nss/lib/freebl/mpi/doc/gcd.pod create mode 100644 security/nss/lib/freebl/mpi/doc/invmod.pod create mode 100644 security/nss/lib/freebl/mpi/doc/isprime.pod create mode 100644 security/nss/lib/freebl/mpi/doc/lap.pod create mode 100644 security/nss/lib/freebl/mpi/doc/mpi-test.pod create mode 100644 security/nss/lib/freebl/mpi/doc/mul.txt create mode 100644 security/nss/lib/freebl/mpi/doc/pi.txt create mode 100644 security/nss/lib/freebl/mpi/doc/prime.txt create mode 100644 security/nss/lib/freebl/mpi/doc/prng.pod create mode 100644 security/nss/lib/freebl/mpi/doc/redux.txt create mode 100644 security/nss/lib/freebl/mpi/doc/sqrt.txt create mode 100644 security/nss/lib/freebl/mpi/doc/square.txt create mode 100644 security/nss/lib/freebl/mpi/doc/timing.txt create mode 100644 security/nss/lib/freebl/mpi/hpma512.s create mode 100644 security/nss/lib/freebl/mpi/hppa20.s create mode 100644 security/nss/lib/freebl/mpi/logtab.h create mode 100644 security/nss/lib/freebl/mpi/montmulf.c create mode 100644 security/nss/lib/freebl/mpi/montmulf.h create mode 100644 security/nss/lib/freebl/mpi/montmulf.il create mode 100644 security/nss/lib/freebl/mpi/montmulf.s create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.il create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.s create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.il create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.s create mode 100644 security/nss/lib/freebl/mpi/mp_comba.c create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m-priv.h create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.c create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.h create mode 100644 security/nss/lib/freebl/mpi/mpcpucache.c create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_amd64.s create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_x86.s create mode 100644 security/nss/lib/freebl/mpi/mpi-config.h create mode 100644 security/nss/lib/freebl/mpi/mpi-priv.h create mode 100644 security/nss/lib/freebl/mpi/mpi.c create mode 100644 security/nss/lib/freebl/mpi/mpi.h create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64.c create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_common.S create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_masm.asm create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_sun.s create mode 100644 security/nss/lib/freebl/mpi/mpi_arm.c create mode 100644 security/nss/lib/freebl/mpi/mpi_hp.c create mode 100644 security/nss/lib/freebl/mpi/mpi_i86pc.s create mode 100644 security/nss/lib/freebl/mpi/mpi_mips.s create mode 100644 security/nss/lib/freebl/mpi/mpi_sparc.c create mode 100644 security/nss/lib/freebl/mpi/mpi_sse2.s create mode 100644 security/nss/lib/freebl/mpi/mpi_x86.s create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_asm.c create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_os2.s create mode 100644 security/nss/lib/freebl/mpi/mplogic.c create mode 100644 security/nss/lib/freebl/mpi/mplogic.h create mode 100644 security/nss/lib/freebl/mpi/mpmontg.c create mode 100644 security/nss/lib/freebl/mpi/mpprime.c create mode 100644 security/nss/lib/freebl/mpi/mpprime.h create mode 100644 security/nss/lib/freebl/mpi/mpv_sparc.c create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv8.s create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv9.s create mode 100644 security/nss/lib/freebl/mpi/mpvalpha.c create mode 100644 security/nss/lib/freebl/mpi/mulsqr.c create mode 100644 security/nss/lib/freebl/mpi/primes.c create mode 100644 security/nss/lib/freebl/mpi/vis_32.il create mode 100644 security/nss/lib/freebl/mpi/vis_64.il create mode 100644 security/nss/lib/freebl/mpi/vis_proto.h create mode 100644 security/nss/lib/freebl/nsslowhash.c create mode 100644 security/nss/lib/freebl/nsslowhash.h create mode 100644 security/nss/lib/freebl/ppc-crypto.h create mode 100644 security/nss/lib/freebl/ppc-gcm-wrap.c create mode 100644 security/nss/lib/freebl/ppc-gcm.h create mode 100644 security/nss/lib/freebl/ppc-gcm.s create mode 100644 security/nss/lib/freebl/pqg.c create mode 100644 security/nss/lib/freebl/pqg.h create mode 100644 security/nss/lib/freebl/rawhash.c create mode 100644 security/nss/lib/freebl/ret_cr16.s create mode 100644 security/nss/lib/freebl/rijndael.c create mode 100644 security/nss/lib/freebl/rijndael.h create mode 100644 security/nss/lib/freebl/rijndael32.tab create mode 100644 security/nss/lib/freebl/rijndael_tables.c create mode 100644 security/nss/lib/freebl/rsa.c create mode 100644 security/nss/lib/freebl/rsapkcs.c create mode 100644 security/nss/lib/freebl/scripts/LICENSE create mode 100755 security/nss/lib/freebl/scripts/gen.sh create mode 100644 security/nss/lib/freebl/scripts/ppc-xlate.pl create mode 100644 security/nss/lib/freebl/scripts/sha512p8-ppc.pl create mode 100644 security/nss/lib/freebl/secmpi.c create mode 100644 security/nss/lib/freebl/secmpi.h create mode 100644 security/nss/lib/freebl/secrng.h create mode 100644 security/nss/lib/freebl/sha-fast-amd64-sun.s create mode 100644 security/nss/lib/freebl/sha1-armv8.c create mode 100644 security/nss/lib/freebl/sha256-armv8.c create mode 100644 security/nss/lib/freebl/sha256-x86.c create mode 100644 security/nss/lib/freebl/sha256.h create mode 100644 security/nss/lib/freebl/sha512-p8.s create mode 100644 security/nss/lib/freebl/sha512.c create mode 100644 security/nss/lib/freebl/sha_fast.c create mode 100644 security/nss/lib/freebl/sha_fast.h create mode 100644 security/nss/lib/freebl/shsign.h create mode 100644 security/nss/lib/freebl/shvfy.c create mode 100644 security/nss/lib/freebl/stubs.c create mode 100644 security/nss/lib/freebl/stubs.h create mode 100644 security/nss/lib/freebl/sysrand.c create mode 100644 security/nss/lib/freebl/tlsprfalg.c create mode 100644 security/nss/lib/freebl/unix_rand.c create mode 100644 security/nss/lib/freebl/unix_urandom.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Curve25519_51.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Curve25519_51.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Curve25519_64.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Curve25519_64.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Krmllib.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Poly1305_128.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Poly1305_128.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Poly1305_256.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Poly1305_256.h create mode 100644 security/nss/lib/freebl/verified/Hacl_Poly1305_32.c create mode 100644 security/nss/lib/freebl/verified/Hacl_Poly1305_32.h create mode 100644 security/nss/lib/freebl/verified/config.h create mode 100644 security/nss/lib/freebl/verified/curve25519-inline.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Bignum.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h create mode 100644 security/nss/lib/freebl/verified/internal/Hacl_Spec.h create mode 100644 security/nss/lib/freebl/verified/internal/Vale.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h create mode 100644 security/nss/lib/freebl/verified/karamel/include/krmllib.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h create mode 100644 security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def create mode 100644 security/nss/lib/freebl/verified/libintvector.h create mode 100644 security/nss/lib/freebl/win_rand.c (limited to 'security/nss/lib/freebl') diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile new file mode 100644 index 0000000000..74e8e65459 --- /dev/null +++ b/security/nss/lib/freebl/Makefile @@ -0,0 +1,810 @@ +#! gmake +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +####################################################################### +# (1) Include initial platform-independent assignments (MANDATORY). # +####################################################################### + +include manifest.mn + +####################################################################### +# (2) Include "global" configuration information. (OPTIONAL) # +####################################################################### + +include $(CORE_DEPTH)/coreconf/config.mk + +####################################################################### +# (3) Include "component" configuration information. (OPTIONAL) # +####################################################################### + + + +####################################################################### +# (4) Include "local" platform-dependent assignments (OPTIONAL). # +####################################################################### + +include config.mk + +# default for all platforms +# unset this on those that have multiple freebl libraries +FREEBL_BUILD_SINGLE_SHLIB = 1 + +ifdef USE_64 + DEFINES += -DNSS_USE_64 +endif + +ifdef USE_ABI32_FPU + DEFINES += -DNSS_USE_ABI32_FPU +endif + +ifeq ($(FREEBL_NO_DEPEND),1) + DEFINES += -DFREEBL_NO_DEPEND + STUBS_SRCS = stubs.c +endif + +ifeq ($(FREEBL_LOWHASH),1) + DEFINES += -DFREEBL_LOWHASH + LOWHASH_SRCS = nsslowhash.c + LOWHASH_EXPORTS = nsslowhash.h + MAPFILE_SOURCE = freebl_hash_vector.def + NEED_STUB_BUILD = 1 +else + MAPFILE_SOURCE = freebl.def +endif + +ifdef USE_STUB_BUILD + CSRCS = lowhash_vector.c + SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX)) + OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS)) + ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \ + $(NOSUCHFILE) so_locations + MAPFILE_SOURCE = freebl_hash.def +endif + +# FREEBL_USE_PRELINK +# +# Most modern version of Linux support a speed optimization scheme where an +# application called prelink modifies programs and shared libraries to quickly +# load if they fit into an already designed address space. In short, prelink +# scans the list of programs and libraries on your system, assigns them a +# predefined space in the the address space, then provides the fixups to the +# library. +# +# The modification of the shared library is correctly detected by the freebl +# FIPS checksum scheme where we check a signed hash of the library against the +# library itself. +# +# The prelink command itself can reverse the process of modification and output +# the prestine shared library as it was before prelink made it's changes. +# This option tells Freebl could use prelink to output the original copy of +# the shared library before prelink modified it. +# +# FREEBL_PRELINK_COMMAND +# +# This is an optional environment variable which can override the default +# prelink command. It could be used on systems that did something similiar to +# prelink but used a different command and syntax. The only requirement is the +# program must take the library as the last argument, the program must output +# the original library to standard out, and the program does not need to take +# any quoted or imbedded spaces in its arguments (except the path to the +# library itself, which can have imbedded spaces or special characters). +# +ifdef FREEBL_USE_PRELINK + DEFINES += -DFREEBL_USE_PRELINK +ifdef LINUX + DEFINES += -D__GNU_SOURCE=1 +endif +endif +ifdef NSS_NO_INIT_SUPPORT + DEFINES += -DNSS_NO_INIT_SUPPORT +endif +ifdef NSS_STRICT_INTEGRITY + DEFINES += -DNSS_STRICT_INTEGRITY_ +endif + +ifdef FREEBL_PRELINK_COMMAND + DEFINES +=-DFREEBL_PRELINK_COMMAND=\"$(FREEBL_PRELINK_COMMAND)\" +endif +# NSS_X86 means the target is a 32-bits x86 CPU architecture +# NSS_X64 means the target is a 64-bits 64 CPU architecture +# NSS_X86_OR_X64 means the target is either x86 or x64 +ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH))) + DEFINES += -DNSS_X86_OR_X64 + EXTRA_SRCS += gcm-x86.c aes-x86.c +$(OBJDIR)/gcm-x86.o: CFLAGS += -mpclmul -maes +$(OBJDIR)/aes-x86.o: CFLAGS += -mpclmul -maes +ifneq (,$(USE_64)$(USE_X32)) + DEFINES += -DNSS_X64 +else + DEFINES += -DNSS_X86 +endif + ifdef CC_IS_CLANG + EXTRA_SRCS += sha256-x86.c + DEFINES += -DUSE_HW_SHA2 + else ifeq (1,$(CC_IS_GCC)) + # Old compiler doesn't support Intel SHA extension + ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION)))) + EXTRA_SRCS += sha256-x86.c + DEFINES += -DUSE_HW_SHA2 + endif + ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) + EXTRA_SRCS += sha256-x86.c + DEFINES += -DUSE_HW_SHA2 + endif + endif +endif +ifeq ($(CPU_ARCH),aarch64) + ifdef CC_IS_CLANG + DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2 + EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c + else ifeq (1,$(CC_IS_GCC)) + # GCC versions older than 4.9 don't support ARM AES. The check + # is done in two parts, first allows "major.minor" == "4.9", + # and then rejects any major versions prior to 5. Note that + # there has been no GCC 4.10, as it was renamed to GCC 5. + ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION)))) + DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2 + EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c + endif + ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) + DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2 + EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c + endif + endif +endif +ifeq ($(CPU_ARCH),arm) +ifndef NSS_DISABLE_ARM32_NEON + EXTRA_SRCS += gcm-arm32-neon.c +endif + ifdef CC_IS_CLANG + DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2 + EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c + else ifeq (1,$(CC_IS_GCC)) + # GCC versions older than 4.9 don't support ARM AES. The check + # is done in two parts, first allows "major.minor" == "4.9", + # and then rejects any major versions prior to 5. Note that + # there has been no GCC 4.10, as it was renamed to GCC 5. + ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION)))) + DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2 + EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c + endif + ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) + DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2 + EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c + endif + endif +endif + +ifeq (OS2,$(OS_TARGET)) + ASFILES = mpi_x86_os2.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D + DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + DEFINES += -DMP_IS_LITTLE_ENDIAN +endif + +ifeq (,$(filter-out WINNT WIN95,$(OS_TARGET))) +ifndef USE_64 +# 32-bit Windows +ifdef NS_USE_GCC +# Ideally, we want to use assembler +# ASFILES = mpi_x86.s +# DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE \ +# -DMP_ASSEMBLY_DIV_2DX1D +# but we haven't figured out how to make it work, so we are not +# using assembler right now. + ASFILES = + DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT +else +# MSVC + MPI_SRCS += mpi_x86_asm.c + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + ifdef BUILD_OPT + OPTIMIZER += -Ox # maximum optimization for freebl + endif + # The Intel AES assembly code requires Visual C++ 2010. + # if $(_MSC_VER) >= 1600 (Visual C++ 2010) + ifeq ($(firstword $(sort $(_MSC_VER) 1600)),1600) + DEFINES += -DUSE_HW_AES -DINTEL_GCM + ASFILES += intel-aes-x86-masm.asm intel-gcm-x86-masm.asm + EXTRA_SRCS += intel-gcm-wrap.c + ifeq ($(CLANG_CL),1) + INTEL_GCM_CLANG_CL = 1 + endif + endif + # The Intel SHA extenstion requires Visual C++ 2015. + ifeq ($(_MSC_VER_GE_14),1) + DEFINES += -DUSE_HW_SHA2 + EXTRA_SRCS += sha256-x86.c + endif +endif +else + # -DMP_NO_MP_WORD + DEFINES += -DMP_IS_LITTLE_ENDIAN +ifdef NS_USE_GCC +# Ideally, we should use amd64 assembly code, but it's not yet mingw-w64 +# compatible. +else +# MSVC + ifdef BUILD_OPT + OPTIMIZER += -Ox # maximum optimization for freebl + endif +ifeq ($(CPU_ARCH),x86_64) + ASFILES = arcfour-amd64-masm.asm mpi_amd64_masm.asm mp_comba_amd64_masm.asm + DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DNSS_USE_COMBA + # The Intel AES assembly code requires Visual C++ 2010 (10.0). The _xgetbv + # compiler intrinsic function requires Visual C++ 2010 (10.0) SP1. + ifeq ($(_MSC_VER_GE_10SP1),1) + DEFINES += -DUSE_HW_AES -DINTEL_GCM + ASFILES += intel-aes-x64-masm.asm intel-gcm-x64-masm.asm + EXTRA_SRCS += intel-gcm-wrap.c + ifeq ($(CLANG_CL),1) + INTEL_GCM_CLANG_CL = 1 + endif + endif + # The Intel SHA extenstion requires Visual C++ 2015. + ifeq ($(_MSC_VER_GE_14),1) + DEFINES += -DUSE_HW_SHA2 + EXTRA_SRCS += sha256-x86.c + endif + MPI_SRCS += mpi_amd64.c +endif +endif +endif +endif + +ifeq ($(OS_TARGET),Darwin) +ifeq ($(CPU_ARCH),x86_64) + ASFILES = mpi_amd64_common.s + DEFINES += -DMPI_AMD64 -DMP_IS_LITTLE_ENDIAN + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DNSS_USE_COMBA + MPI_SRCS += mpi_amd64.c mp_comba.c +else ifeq ($(CPU_ARCH),x86) + ASFILES = mpi_sse2.s + DEFINES += -DMP_USE_UINT_DIGIT + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D +endif +endif # Darwin + +ifeq ($(OS_TARGET),Linux) +ifeq ($(CPU_ARCH),x86_64) + # Lower case s on mpi_amd64_common due to make implicit rules. + ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s + ASFLAGS += -fPIC -Wa,--noexecstack + DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DNSS_USE_COMBA + DEFINES += -DMP_IS_LITTLE_ENDIAN +# DEFINES += -DMPI_AMD64_ADD + # comment the next four lines to turn off Intel HW acceleration. + DEFINES += -DUSE_HW_AES -DINTEL_GCM + ASFILES += intel-aes.s intel-gcm.s + EXTRA_SRCS += intel-gcm-wrap.c + INTEL_GCM = 1 + MPI_SRCS += mpi_amd64.c mp_comba.c +endif +ifeq ($(CPU_ARCH),x86) + ASFILES = mpi_x86.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT + DEFINES += -DMP_IS_LITTLE_ENDIAN +endif +ifeq ($(CPU_ARCH),arm) + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_USE_UINT_DIGIT + DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512 + MPI_SRCS += mpi_arm.c +endif +ifeq ($(CPU_ARCH),ppc) + EXTRA_SRCS += gcm-ppc.c +ifdef USE_64 + DEFINES += -DNSS_NO_INIT_SUPPORT + PPC_ABI := $(shell $(CC) -dM -E - < /dev/null | awk '$$2 == "_CALL_ELF" {print $$3}') + ifeq ($(PPC_ABI),2) + ASFILES += sha512-p8.s + ifeq ($(OS_TEST),ppc64le) + DEFINES += -DPPC_GCM + EXTRA_SRCS += chacha20poly1305-ppc.c ppc-gcm-wrap.c + ASFILES += chacha20-ppc64le.s ppc-gcm.s + endif # ppc64le + endif +endif # USE_64 +endif # ppc +endif # Linux + +ifeq ($(OS_TARGET),AIX) + DEFINES += -DMP_USE_UINT_DIGIT + ifndef USE_64 + DEFINES += -DMP_NO_DIV_WORD -DMP_NO_ADD_WORD -DMP_NO_SUB_WORD + endif +endif # AIX + +ifeq ($(OS_TARGET), HP-UX) +ifneq ($(OS_TEST), ia64) +# PA-RISC +ASFILES += ret_cr16.s +ifndef USE_64 + FREEBL_BUILD_SINGLE_SHLIB = + HAVE_ABI32_INT32 = 1 + HAVE_ABI32_FPU = 1 +endif +ifdef FREEBL_CHILD_BUILD +ifdef USE_ABI32_INT32 +# build for DA1.1 (HP PA 1.1) 32-bit ABI build with 32-bit arithmetic + DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512 +else +ifdef USE_64 +# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits + MPI_SRCS += mpi_hp.c + ASFILES += hpma512.s hppa20.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +else +# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model +# (the 32-bit ABI with 64-bit registers) using 64-bit digits + MPI_SRCS += mpi_hp.c + ASFILES += hpma512.s hppa20.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +ifndef NS_USE_GCC + ARCHFLAG = -Aa +e +DA2.0 +DS2.0 +endif +endif +endif +endif +endif +endif + +# The blapi functions are defined not only in the freebl shared +# libraries but also in the shared libraries linked with loader.c +# (libsoftokn3.so and libssl3.so). We need to use GNU ld's +# -Bsymbolic option or the equivalent option for other linkers +# to bind the blapi function references in FREEBLVector vector +# (ldvector.c) to the blapi functions defined in the freebl +# shared libraries. +ifeq (,$(filter-out BSD_OS FreeBSD Linux NetBSD OpenBSD, $(OS_TARGET))) + MKSHLIB += -Wl,-Bsymbolic +endif + +ifeq ($(OS_TARGET),SunOS) + +ifdef NS_USE_GCC + ifdef GCC_USE_GNU_LD + MKSHLIB += -Wl,-Bsymbolic,-z,now,-z,text + else + MKSHLIB += -Wl,-B,symbolic,-z,now,-z,text + endif # GCC_USE_GNU_LD +else + MKSHLIB += -B symbolic -z now -z text +endif # NS_USE_GCC + +# Sun's WorkShop defines v8, v8plus and v9 architectures. +# gcc on Solaris defines v8 and v9 "cpus". +# gcc's v9 is equivalent to Workshop's v8plus. +# gcc's -m64 is equivalent to Workshop's v9 +# We always use Sun's assembler, which uses Sun's naming convention. +ifeq ($(CPU_ARCH),sparc) + FREEBL_BUILD_SINGLE_SHLIB= + ifdef USE_64 + HAVE_ABI64_INT = 1 + HAVE_ABI64_FPU = 1 + else + HAVE_ABI32_FPU = 1 + HAVE_ABI32_INT64 = 1 + endif + SYSV_SPARC = 1 + SOLARIS_AS = /usr/ccs/bin/as + #### set arch, asm, c flags + ifdef NS_USE_GCC + ifdef USE_ABI32_INT64 + ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plus + SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC + endif + ifdef USE_ABI32_FPU + ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plusa + SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC + endif # USE_ABI32_FPU + ifdef USE_ABI64_INT + # this builds for Sparc v9a pure 64-bit architecture + ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9 + SOLARIS_AS_FLAGS = -xarch=v9 -K PIC + endif + ifdef USE_ABI64_FPU + # this builds for Sparc v9a pure 64-bit architecture + # It uses floating point, and 32-bit word size + ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9a + SOLARIS_AS_FLAGS = -xarch=v9a -K PIC + endif + else # NS_USE_GCC + # FPU_TARGET_OPTIMIZER specifies the target processor and cache + # properties of the ABI32_FPU and ABI64_FPU architectures for use + # by the optimizer. + ifeq (,$(findstring Sun WorkShop 6,$(shell $(CC) -V 2>&1))) + # if the compiler is not Forte 6 + FPU_TARGET_OPTIMIZER = -xcache=64/32/4:1024/64/4 -xchip=ultra3 + else + # Forte 6 C compiler generates incorrect code for rijndael.c + # if -xchip=ultra3 is used (Bugzilla bug 333925). So we revert + # to what we used in NSS 3.10. + FPU_TARGET_OPTIMIZER = -xchip=ultra2 + endif + ifdef USE_ABI32_INT64 + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses 64-bit words, integer arithmetic, + # no FPU (non-VIS cpus). + # These flags were suggested by the compiler group for building + # with SunStudio 10. + ifdef BUILD_OPT + SOL_CFLAGS += -xO4 + endif + SOL_CFLAGS += -xtarget=generic + ARCHFLAG = -xarch=v8plus + SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC + endif + ifdef USE_ABI32_FPU + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses FPU code, and 32-bit word size. + # these flags were determined by running cc -### -fast and copying + # the generated flag settings + SOL_CFLAGS += -fsingle -xmemalign=8s + ifdef BUILD_OPT + SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1 + SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all + SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend + SOL_CFLAGS += -xlibmil -xO5 + endif + ARCHFLAG = -xarch=v8plusa + SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC + endif + ifdef USE_ABI64_INT + # this builds for Sparc v9a pure 64-bit architecture, + # no FPU (non-VIS cpus). For building with SunStudio 10. + ifdef BUILD_OPT + SOL_CFLAGS += -xO4 + endif + SOL_CFLAGS += -xtarget=generic + ARCHFLAG = -xarch=v9 + SOLARIS_AS_FLAGS = -xarch=v9 -K PIC + endif + ifdef USE_ABI64_FPU + # this builds for Sparc v9a pure 64-bit architecture + # It uses floating point, and 32-bit word size. + # See comment for USE_ABI32_FPU. + SOL_CFLAGS += -fsingle -xmemalign=8s + ifdef BUILD_OPT + SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1 + SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all + SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend + SOL_CFLAGS += -xlibmil -xO5 + endif + ARCHFLAG = -xarch=v9a + SOLARIS_AS_FLAGS = -xarch=v9a -K PIC + endif + endif # NS_USE_GCC + + ### set flags for both GCC and Sun cc + ifdef USE_ABI32_INT64 + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU + # best times are with no MP_ flags specified + endif + ifdef USE_ABI32_FPU + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses FPU code, and 32-bit word size + MPI_SRCS += mpi_sparc.c + ASFILES = mpv_sparcv8.s montmulfv8.s + DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL + endif + ifdef USE_ABI64_INT + # this builds for Sparc v9a pure 64-bit architecture + # best times are with no MP_ flags specified + endif + ifdef USE_ABI64_FPU + # this builds for Sparc v9a pure 64-bit architecture + # It uses floating point, and 32-bit word size + MPI_SRCS += mpi_sparc.c + ASFILES = mpv_sparcv9.s montmulfv9.s + DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL + endif + +else + # Solaris for non-sparc family CPUs + ifdef NS_USE_GCC + LD = gcc + AS = gcc + ASFLAGS = -x assembler-with-cpp + endif + ifeq ($(USE_64),1) + # Solaris for AMD64 + ifdef NS_USE_GCC + ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s + ASFLAGS += -march=opteron -m64 -fPIC + MPI_SRCS += mp_comba.c + # comment the next four lines to turn off Intel HW acceleration + ASFILES += intel-gcm.s + EXTRA_SRCS += intel-gcm-wrap.c + INTEL_GCM = 1 + DEFINES += -DINTEL_GCM + else + ASFILES = arcfour-amd64-sun.s mpi_amd64_sun.s sha-fast-amd64-sun.s + ASFILES += mp_comba_amd64_sun.s mpcpucache_amd64.s + ASFLAGS += -xarch=generic64 -K PIC + SOL_CFLAGS += -xprefetch=no + SHA_SRCS = + MPCPU_SRCS = + # Intel acceleration for GCM does not build currently with Studio + endif + DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DNSS_USE_COMBA -DMP_IS_LITTLE_ENDIAN + # comment the next two lines to turn off Intel HW acceleration + DEFINES += -DUSE_HW_AES + ASFILES += intel-aes.s + MPI_SRCS += mpi_amd64.c + else + # Solaris x86 + DEFINES += -DMP_USE_UINT_DIGIT + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D + ASFILES = mpi_i86pc.s + ifndef NS_USE_GCC + MPCPU_SRCS = + ASFILES += mpcpucache_x86.s + endif + endif +endif # Solaris for non-sparc family CPUs +endif # target == SunO + +ifdef USE_64 +# no __int128 at least up to lcc 1.23 (pretending to be gcc5) +# NB: CC_NAME is not defined here +ifneq ($(shell $(CC) -? 2>&1 >/dev/null /dev/null && echo 1) +$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp) +$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp) +$(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp) +ifndef NSS_DISABLE_ARM32_NEON +$(OBJDIR)/$(PROG_PREFIX)gcm-arm32-neon$(OBJ_SUFFIX): CFLAGS += -mfpu=neon$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp) +endif +endif + +ifeq ($(CPU_ARCH),aarch64) +$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto +$(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto +$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto +$(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto +endif + +ifeq ($(CPU_ARCH),ppc) +$(OBJDIR)/$(PROG_PREFIX)sha512$(OBJ_SUFFIX): CFLAGS += -funroll-loops -fpeel-loops +ifneq ($(NSS_DISABLE_ALTIVEC),1) +$(OBJDIR)/$(PROG_PREFIX)gcm-ppc$(OBJ_SUFFIX): CFLAGS += -maltivec +$(OBJDIR)/$(PROG_PREFIX)gcm$(OBJ_SUFFIX): CFLAGS += -maltivec +$(OBJDIR)/$(PROG_PREFIX)rijndael$(OBJ_SUFFIX): CFLAGS += -maltivec +$(OBJDIR)/$(PROG_PREFIX)sha512$(OBJ_SUFFIX): CFLAGS += -maltivec +$(OBJDIR)/$(PROG_PREFIX)chacha20poly1305-ppc$(OBJ_SUFFIX): CFLAGS += -maltivec +endif +ifneq ($(NSS_DISABLE_CRYPTO_VSX),1) +$(OBJDIR)/$(PROG_PREFIX)gcm-ppc$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx +$(OBJDIR)/$(PROG_PREFIX)gcm$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx +$(OBJDIR)/$(PROG_PREFIX)rijndael$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx +$(OBJDIR)/$(PROG_PREFIX)sha512$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx +$(OBJDIR)/$(PROG_PREFIX)chacha20poly1305-ppc$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx +endif +endif + +$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx +$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx +$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx + +ifndef NSS_DISABLE_AVX2 +$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx2 +$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx -mavx2 +$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx -mavx2 +endif diff --git a/security/nss/lib/freebl/aes-armv8.c b/security/nss/lib/freebl/aes-armv8.c new file mode 100644 index 0000000000..7be39ede89 --- /dev/null +++ b/security/nss/lib/freebl/aes-armv8.c @@ -0,0 +1,1169 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "secerr.h" +#include "rijndael.h" + +#if ((defined(__clang__) || \ + (defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \ + defined(IS_LITTLE_ENDIAN)) + +#ifndef __ARM_FEATURE_CRYPTO +#error "Compiler option is invalid" +#endif + +#include + +SECStatus +arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaeseq_u8(state, key1); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key2); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key3); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key4); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key5); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key6); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key7); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key8); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key9); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key10); + /* AddRoundKey */ + state = veorq_u8(state, key11); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + } + + return SECSuccess; +} + +SECStatus +arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (inputLen == 0) { + return SECSuccess; + } + + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaesdq_u8(state, key11); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key10); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key9); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key8); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key7); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key6); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key5); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key4); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key3); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key2); + /* AddRoundKey */ + state = veorq_u8(state, key1); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + } + + return SECSuccess; +} + +SECStatus +arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11; + uint8x16_t iv; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + /* iv */ + iv = vld1q_u8(cx->iv); + + /* expanedKey */ + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + state = veorq_u8(state, iv); + + /* Rounds */ + state = vaeseq_u8(state, key1); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key2); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key3); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key4); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key5); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key6); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key7); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key8); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key9); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key10); + /* AddRoundKey */ + state = veorq_u8(state, key11); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + iv = state; + } + vst1q_u8(cx->iv, iv); + + return SECSuccess; +} + +SECStatus +arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t iv; + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + /* iv */ + iv = vld1q_u8(cx->iv); + + /* expanedKey */ + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + + while (inputLen > 0) { + uint8x16_t state, old_state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + old_state = state; + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaesdq_u8(state, key11); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key10); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key9); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key8); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key7); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key6); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key5); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key4); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key3); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key2); + /* AddRoundKey */ + state = veorq_u8(state, key1); + + state = veorq_u8(state, iv); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + + iv = old_state; + } + vst1q_u8(cx->iv, iv); + + return SECSuccess; +} + +SECStatus +arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13; + PRUint8 *key = (PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaeseq_u8(state, key1); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key2); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key3); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key4); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key5); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key6); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key7); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key8); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key9); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key10); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key11); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key12); + /* AddRoundKey */ + state = veorq_u8(state, key13); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + } + + return SECSuccess; +} + +SECStatus +arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaesdq_u8(state, key13); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key12); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key11); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key10); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key9); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key8); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key7); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key6); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key5); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key4); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key3); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key2); + /* AddRoundKey */ + state = veorq_u8(state, key1); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + } + + return SECSuccess; +} + +SECStatus +arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13; + uint8x16_t iv; + PRUint8 *key = (PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + /* iv */ + iv = vld1q_u8(cx->iv); + + /* expanedKey */ + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + state = veorq_u8(state, iv); + + /* Rounds */ + state = vaeseq_u8(state, key1); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key2); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key3); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key4); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key5); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key6); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key7); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key8); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key9); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key10); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key11); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key12); + state = veorq_u8(state, key13); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + iv = state; + } + vst1q_u8(cx->iv, iv); + + return SECSuccess; +} + +SECStatus +arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t iv; + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + /* iv */ + iv = vld1q_u8(cx->iv); + + /* expanedKey */ + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + + while (inputLen > 0) { + uint8x16_t state, old_state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + old_state = state; + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaesdq_u8(state, key13); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key12); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key11); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key10); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key9); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key8); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key7); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key6); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key5); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key4); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key3); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key2); + /* AddRoundKey */ + state = veorq_u8(state, key1); + + state = veorq_u8(state, iv); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + + iv = old_state; + } + vst1q_u8(cx->iv, iv); + + return SECSuccess; +} + +SECStatus +arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13, key14, key15; + PRUint8 *key = (PRUint8 *)cx->k.expandedKey; + + if (inputLen == 0) { + return SECSuccess; + } + + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + key14 = vld1q_u8(key + 208); + key15 = vld1q_u8(key + 224); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaeseq_u8(state, key1); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key2); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key3); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key4); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key5); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key6); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key7); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key8); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key9); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key10); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key11); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key12); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key13); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key14); + /* AddRoundKey */ + state = veorq_u8(state, key15); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + } + return SECSuccess; +} + +SECStatus +arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13, key14, key15; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + key14 = vld1q_u8(key + 208); + key15 = vld1q_u8(key + 224); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaesdq_u8(state, key15); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key14); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key13); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key12); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key11); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key10); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key9); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key8); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key7); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key6); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key5); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key4); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key3); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key2); + /* AddRoundKey */ + state = veorq_u8(state, key1); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + } + + return SECSuccess; +} + +SECStatus +arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13, key14, key15; + uint8x16_t iv; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + /* iv */ + iv = vld1q_u8(cx->iv); + + /* expanedKey */ + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + key14 = vld1q_u8(key + 208); + key15 = vld1q_u8(key + 224); + + while (inputLen > 0) { + uint8x16_t state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + input += 16; + inputLen -= 16; + + state = veorq_u8(state, iv); + + /* Rounds */ + state = vaeseq_u8(state, key1); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key2); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key3); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key4); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key5); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key6); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key7); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key8); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key9); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key10); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key11); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key12); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key13); + state = vaesmcq_u8(state); + state = vaeseq_u8(state, key14); + /* AddRoundKey */ + state = veorq_u8(state, key15); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + iv = state; + } + vst1q_u8(cx->iv, iv); + + return SECSuccess; +} + +SECStatus +arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize) +{ +#if !defined(HAVE_UNALIGNED_ACCESS) + pre_align unsigned char buf[16] post_align; +#endif + uint8x16_t iv; + uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; + uint8x16_t key11, key12, key13, key14, key15; + const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; + + if (!inputLen) { + return SECSuccess; + } + + /* iv */ + iv = vld1q_u8(cx->iv); + + /* expanedKey */ + key1 = vld1q_u8(key); + key2 = vld1q_u8(key + 16); + key3 = vld1q_u8(key + 32); + key4 = vld1q_u8(key + 48); + key5 = vld1q_u8(key + 64); + key6 = vld1q_u8(key + 80); + key7 = vld1q_u8(key + 96); + key8 = vld1q_u8(key + 112); + key9 = vld1q_u8(key + 128); + key10 = vld1q_u8(key + 144); + key11 = vld1q_u8(key + 160); + key12 = vld1q_u8(key + 176); + key13 = vld1q_u8(key + 192); + key14 = vld1q_u8(key + 208); + key15 = vld1q_u8(key + 224); + + while (inputLen > 0) { + uint8x16_t state, old_state; +#if defined(HAVE_UNALIGNED_ACCESS) + state = vld1q_u8(input); +#else + if ((uintptr_t)input & 0x7) { + memcpy(buf, input, 16); + state = vld1q_u8(__builtin_assume_aligned(buf, 16)); + } else { + state = vld1q_u8(__builtin_assume_aligned(input, 8)); + } +#endif + old_state = state; + input += 16; + inputLen -= 16; + + /* Rounds */ + state = vaesdq_u8(state, key15); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key14); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key13); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key12); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key11); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key10); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key9); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key8); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key7); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key6); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key5); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key4); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key3); + state = vaesimcq_u8(state); + state = vaesdq_u8(state, key2); + /* AddRoundKey */ + state = veorq_u8(state, key1); + + state = veorq_u8(state, iv); + +#if defined(HAVE_UNALIGNED_ACCESS) + vst1q_u8(output, state); +#else + if ((uintptr_t)output & 0x7) { + vst1q_u8(__builtin_assume_aligned(buf, 16), state); + memcpy(output, buf, 16); + } else { + vst1q_u8(__builtin_assume_aligned(output, 8), state); + } +#endif + output += 16; + + iv = old_state; + } + vst1q_u8(cx->iv, iv); + + return SECSuccess; +} + +#endif diff --git a/security/nss/lib/freebl/aes-armv8.h b/security/nss/lib/freebl/aes-armv8.h new file mode 100644 index 0000000000..b0ef1c8708 --- /dev/null +++ b/security/nss/lib/freebl/aes-armv8.h @@ -0,0 +1,103 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +SECStatus arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); + +#define native_aes_ecb_worker(encrypt, keysize) \ + ((encrypt) \ + ? ((keysize) == 16 ? arm_aes_encrypt_ecb_128 \ + : (keysize) == 24 ? arm_aes_encrypt_ecb_192 \ + : arm_aes_encrypt_ecb_256) \ + : ((keysize) == 16 ? arm_aes_decrypt_ecb_128 \ + : (keysize) == 24 ? arm_aes_decrypt_ecb_192 \ + : arm_aes_decrypt_ecb_256)) + +#define native_aes_cbc_worker(encrypt, keysize) \ + ((encrypt) \ + ? ((keysize) == 16 ? arm_aes_encrypt_cbc_128 \ + : (keysize) == 24 ? arm_aes_encrypt_cbc_192 \ + : arm_aes_encrypt_cbc_256) \ + : ((keysize) == 16 ? arm_aes_decrypt_cbc_128 \ + : (keysize) == 24 ? arm_aes_decrypt_cbc_192 \ + : arm_aes_decrypt_cbc_256)) + +#define native_aes_init(encrypt, keysize) \ + do { \ + if (encrypt) { \ + rijndael_key_expansion(cx, key, Nk); \ + } else { \ + rijndael_invkey_expansion(cx, key, Nk); \ + } \ + } while (0) diff --git a/security/nss/lib/freebl/aes-x86.c b/security/nss/lib/freebl/aes-x86.c new file mode 100644 index 0000000000..0cebb202a8 --- /dev/null +++ b/security/nss/lib/freebl/aes-x86.c @@ -0,0 +1,184 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "rijndael.h" +#include "secerr.h" + +#include /* aes-ni */ + +#define EXPAND_KEY128(k, rcon, res) \ + tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ + tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ + tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + res = _mm_xor_si128(tmp, tmp_key) + +static void +native_key_expansion128(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->k.keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); + EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); + EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); + EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); + EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); + EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); + EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); + EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); + EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); + EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); +} + +#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ + tmp2 = _mm_slli_si128(k0, 4); \ + tmp1 = _mm_xor_si128(k0, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ + res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) + +#define EXPAND_KEY192_PART2(res, k1, k2) \ + tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ + res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) + +#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ + EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ + EXPAND_KEY192_PART2(carry, res1, tmp3); \ + res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ + _mm_castsi128_pd(tmp3), 0)); \ + res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ + _mm_castsi128_pd(carry), 1)); \ + EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) + +static void +native_key_expansion192(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->k.keySchedule; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + pre_align __m128i tmp3 post_align; + pre_align __m128i carry post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], + keySchedule[3], carry, 0x1, 0x2); + EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); + EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], + keySchedule[6], carry, 0x4, 0x8); + EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); + EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], + keySchedule[9], carry, 0x10, 0x20); + EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); + EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], + keySchedule[12], carry, 0x40, 0x80); +} + +#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ + tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ + tmp2 = _mm_slli_si128(k1x, 4); \ + tmp1 = _mm_xor_si128(k1x, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + res = _mm_xor_si128(tmp1, tmp_key); + +#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ + EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ + EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) + +static void +native_key_expansion256(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->k.keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], + keySchedule[1], 0x01); + EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], + keySchedule[3], 0x02); + EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], + keySchedule[5], 0x04); + EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], + keySchedule[7], 0x08); + EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], + keySchedule[9], 0x10); + EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], + keySchedule[11], 0x20); + EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], + keySchedule[13], 0xFF); +} + +/* + * AES key expansion using aes-ni instructions. + */ +void +rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, + unsigned int Nk) +{ + switch (Nk) { + case 4: + native_key_expansion128(cx, key); + return; + case 6: + native_key_expansion192(cx, key); + return; + case 8: + native_key_expansion256(cx, key); + return; + default: + /* This shouldn't happen (checked by the caller). */ + return; + } +} + +void +rijndael_native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + unsigned int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->k.keySchedule[0]); + for (i = 1; i < cx->Nr; ++i) { + m = _mm_aesenc_si128(m, cx->k.keySchedule[i]); + } + m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]); + _mm_storeu_si128((__m128i *)output, m); +} + +void +rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]); + for (i = cx->Nr - 1; i > 0; --i) { + m = _mm_aesdec_si128(m, cx->k.keySchedule[i]); + } + m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]); + _mm_storeu_si128((__m128i *)output, m); +} + +// out = a ^ b +void +native_xorBlock(unsigned char *out, + const unsigned char *a, + const unsigned char *b) +{ + pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a)); + pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b)); + in1 = _mm_xor_si128(in1, in2); + _mm_storeu_si128((__m128i *)(out), in1); +} diff --git a/security/nss/lib/freebl/aeskeywrap.c b/security/nss/lib/freebl/aeskeywrap.c new file mode 100644 index 0000000000..09c0667c7a --- /dev/null +++ b/security/nss/lib/freebl/aeskeywrap.c @@ -0,0 +1,642 @@ +/* + * aeskeywrap.c - implement AES Key Wrap algorithm from RFC 3394 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include + +#include "prcpucfg.h" +#if defined(IS_LITTLE_ENDIAN) || defined(SHA_NO_LONG_LONG) +#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 0 +#else +#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 1 +#endif +#include "prtypes.h" /* for PRUintXX */ +#include "secport.h" /* for PORT_XXX */ +#include "secerr.h" +#include "blapi.h" /* for AES_ functions */ +#include "rijndael.h" + +struct AESKeyWrapContextStr { + AESContext aescx; + unsigned char iv[AES_KEY_WRAP_IV_BYTES]; + void *mem; /* Pointer to beginning of allocated memory. */ +}; + +/******************************************/ +/* +** AES key wrap algorithm, RFC 3394 +*/ + +AESKeyWrapContext * +AESKeyWrap_AllocateContext(void) +{ + /* aligned_alloc is C11 so we have to do it the old way. */ + AESKeyWrapContext *ctx = PORT_ZAlloc(sizeof(AESKeyWrapContext) + 15); + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + ctx->mem = ctx; + return (AESKeyWrapContext *)(((uintptr_t)ctx + 15) & ~(uintptr_t)0x0F); +} + +SECStatus +AESKeyWrap_InitContext(AESKeyWrapContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int x1, + unsigned int encrypt, + unsigned int x2) +{ + SECStatus rv = SECFailure; + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (iv) { + memcpy(cx->iv, iv, sizeof cx->iv); + } else { + memset(cx->iv, 0xA6, sizeof cx->iv); + } + rv = AES_InitContext(&cx->aescx, key, keylen, NULL, NSS_AES, encrypt, + AES_BLOCK_SIZE); + return rv; +} + +/* +** Create a new AES context suitable for AES encryption/decryption. +** "key" raw key data +** "keylen" the number of bytes of key data (16, 24, or 32) +*/ +extern AESKeyWrapContext * +AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, + int encrypt, unsigned int keylen) +{ + SECStatus rv; + AESKeyWrapContext *cx = AESKeyWrap_AllocateContext(); + if (!cx) + return NULL; /* error is already set */ + rv = AESKeyWrap_InitContext(cx, key, keylen, iv, 0, encrypt, 0); + if (rv != SECSuccess) { + PORT_Free(cx->mem); + cx = NULL; /* error should already be set */ + } + return cx; +} + +/* +** Destroy a AES KeyWrap context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit) +{ + if (cx) { + AES_DestroyContext(&cx->aescx, PR_FALSE); + /* memset(cx, 0, sizeof *cx); */ + if (freeit) { + PORT_Free(cx->mem); + } + } +} + +#if !BIG_ENDIAN_WITH_64_BIT_REGISTERS + +/* The AES Key Wrap algorithm has 64-bit values that are ALWAYS big-endian +** (Most significant byte first) in memory. The only ALU operations done +** on them are increment, decrement, and XOR. So, on little-endian CPUs, +** and on CPUs that lack 64-bit registers, these big-endian 64-bit operations +** are simulated in the following code. This is thought to be faster and +** simpler than trying to convert the data to little-endian and back. +*/ + +/* A and T point to two 64-bit values stored most signficant byte first +** (big endian). This function increments the 64-bit value T, and then +** XORs it with A, changing A. +*/ +static void +increment_and_xor(unsigned char *A, unsigned char *T) +{ + if (!++T[7]) + if (!++T[6]) + if (!++T[5]) + if (!++T[4]) + if (!++T[3]) + if (!++T[2]) + if (!++T[1]) + ++T[0]; + + A[0] ^= T[0]; + A[1] ^= T[1]; + A[2] ^= T[2]; + A[3] ^= T[3]; + A[4] ^= T[4]; + A[5] ^= T[5]; + A[6] ^= T[6]; + A[7] ^= T[7]; +} + +/* A and T point to two 64-bit values stored most signficant byte first +** (big endian). This function XORs T with A, giving a new A, then +** decrements the 64-bit value T. +*/ +static void +xor_and_decrement(PRUint64 *A, PRUint64 *T) +{ + unsigned char *TP = (unsigned char *)T; + const PRUint64 mask = 0xFF; + *A = ((*A & mask << 56) ^ (*T & mask << 56)) | + ((*A & mask << 48) ^ (*T & mask << 48)) | + ((*A & mask << 40) ^ (*T & mask << 40)) | + ((*A & mask << 32) ^ (*T & mask << 32)) | + ((*A & mask << 24) ^ (*T & mask << 23)) | + ((*A & mask << 16) ^ (*T & mask << 16)) | + ((*A & mask << 8) ^ (*T & mask << 8)) | + ((*A & mask) ^ (*T & mask)); + + if (!TP[7]--) + if (!TP[6]--) + if (!TP[5]--) + if (!TP[4]--) + if (!TP[3]--) + if (!TP[2]--) + if (!TP[1]--) + TP[0]--; +} + +/* Given an unsigned long t (in host byte order), store this value as a +** 64-bit big-endian value (MSB first) in *pt. +*/ +static void +set_t(unsigned char *pt, unsigned long t) +{ + pt[7] = (unsigned char)t; + t >>= 8; + pt[6] = (unsigned char)t; + t >>= 8; + pt[5] = (unsigned char)t; + t >>= 8; + pt[4] = (unsigned char)t; + t >>= 8; + pt[3] = (unsigned char)t; + t >>= 8; + pt[2] = (unsigned char)t; + t >>= 8; + pt[1] = (unsigned char)t; + t >>= 8; + pt[0] = (unsigned char)t; +} + +#endif + +static void +encode_PRUint32_BE(unsigned char *data, PRUint32 val) +{ + size_t i; + for (i = 0; i < sizeof(PRUint32); i++) { + data[i] = PORT_GET_BYTE_BE(val, i, sizeof(PRUint32)); + } +} + +static PRUint32 +decode_PRUint32_BE(unsigned char *data) +{ + PRUint32 val = 0; + size_t i; + + for (i = 0; i < sizeof(PRUint32); i++) { + val = (val << PR_BITS_PER_BYTE) | data[i]; + } + return val; +} + +/* +** Perform AES key wrap W function. +** "cx" the context +** "iv" the iv is concatenated to the plain text for for executing the function +** "output" the output buffer to store the encrypted data. +** "pOutputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_W(AESKeyWrapContext *cx, unsigned char *iv, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint64 *R = NULL; + unsigned int nBlocks; + unsigned int i, j; + unsigned int aesLen = AES_BLOCK_SIZE; + unsigned int outLen = inputLen + AES_KEY_WRAP_BLOCK_SIZE; + SECStatus s = SECFailure; + /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */ + PRUint64 t; + PRUint64 B[2]; + +#define A B[0] + + /* Check args */ + if (inputLen < 2 * AES_KEY_WRAP_BLOCK_SIZE || + 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return s; + } +#ifdef maybe + if (!output && pOutputLen) { /* caller is asking for output size */ + *pOutputLen = outLen; + return SECSuccess; + } +#endif + if (maxOutputLen < outLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return s; + } + if (cx == NULL || output == NULL || input == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return s; + } + nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE; + R = PORT_NewArray(PRUint64, nBlocks + 1); + if (!R) + return s; /* error is already set. */ + /* + ** 1) Initialize variables. + */ + memcpy(&A, iv, AES_KEY_WRAP_IV_BYTES); + memcpy(&R[1], input, inputLen); +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + t = 0; +#else + memset(&t, 0, sizeof t); +#endif + /* + ** 2) Calculate intermediate values. + */ + for (j = 0; j < 6; ++j) { + for (i = 1; i <= nBlocks; ++i) { + B[1] = R[i]; + s = AES_Encrypt(&cx->aescx, (unsigned char *)B, &aesLen, + sizeof B, (unsigned char *)B, sizeof B); + if (s != SECSuccess) + break; + R[i] = B[1]; +/* here, increment t and XOR A with t (in big endian order); */ +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + A ^= ++t; +#else + increment_and_xor((unsigned char *)&A, (unsigned char *)&t); +#endif + } + } + /* + ** 3) Output the results. + */ + if (s == SECSuccess) { + R[0] = A; + memcpy(output, &R[0], outLen); + if (pOutputLen) + *pOutputLen = outLen; + } else if (pOutputLen) { + *pOutputLen = 0; + } + PORT_ZFree(R, outLen); + return s; +} +#undef A + +/* +** Perform AES key wrap W^-1 function. +** "cx" the context +** "iv" the input IV to verify against. If NULL, then skip verification. +** "ivOut" the output buffer to store the IV (optional). +** "output" the output buffer to store the decrypted data. +** "pOutputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Winv(AESKeyWrapContext *cx, unsigned char *iv, + unsigned char *ivOut, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint64 *R = NULL; + unsigned int nBlocks; + unsigned int i, j; + unsigned int aesLen = AES_BLOCK_SIZE; + unsigned int outLen; + SECStatus s = SECFailure; + /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */ + PRUint64 t; + PRUint64 B[2]; + + /* Check args */ + if (inputLen < 3 * AES_KEY_WRAP_BLOCK_SIZE || + 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return s; + } + outLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE; +#ifdef maybe + if (!output && pOutputLen) { /* caller is asking for output size */ + *pOutputLen = outLen; + return SECSuccess; + } +#endif + if (maxOutputLen < outLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return s; + } + if (cx == NULL || output == NULL || input == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return s; + } + nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE; + R = PORT_NewArray(PRUint64, nBlocks); + if (!R) + return s; /* error is already set. */ + nBlocks--; + /* + ** 1) Initialize variables. + */ + memcpy(&R[0], input, inputLen); + B[0] = R[0]; +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + t = 6UL * nBlocks; +#else + set_t((unsigned char *)&t, 6UL * nBlocks); +#endif + /* + ** 2) Calculate intermediate values. + */ + for (j = 0; j < 6; ++j) { + for (i = nBlocks; i; --i) { +/* here, XOR A with t (in big endian order) and decrement t; */ +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + B[0] ^= t--; +#else + xor_and_decrement(&B[0], &t); +#endif + B[1] = R[i]; + s = AES_Decrypt(&cx->aescx, (unsigned char *)B, &aesLen, + sizeof B, (unsigned char *)B, sizeof B); + if (s != SECSuccess) + break; + R[i] = B[1]; + } + } + /* + ** 3) Output the results. + */ + if (s == SECSuccess) { + int bad = (iv) && memcmp(&B[0], iv, AES_KEY_WRAP_IV_BYTES); + if (!bad) { + memcpy(output, &R[1], outLen); + if (pOutputLen) + *pOutputLen = outLen; + if (ivOut) { + memcpy(ivOut, &B[0], AES_KEY_WRAP_IV_BYTES); + } + } else { + s = SECFailure; + PORT_SetError(SEC_ERROR_BAD_DATA); + if (pOutputLen) + *pOutputLen = 0; + } + } else if (pOutputLen) { + *pOutputLen = 0; + } + PORT_ZFree(R, inputLen); + return s; +} +#undef A + +/* +** Perform AES key wrap. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "pOutputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + return AESKeyWrap_W(cx, cx->iv, output, pOutputLen, maxOutputLen, + input, inputLen); +} + +/* +** Perform AES key unwrap. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "pOutputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + return AESKeyWrap_Winv(cx, cx->iv, NULL, output, pOutputLen, maxOutputLen, + input, inputLen); +} + +#define BLOCK_PAD_POWER2(x, bs) (((bs) - ((x) & ((bs)-1))) & ((bs)-1)) +#define AES_KEY_WRAP_ICV2 0xa6, 0x59, 0x59, 0xa6 +#define AES_KEY_WRAP_ICV2_INT32 0xa65959a6 +#define AES_KEY_WRAP_ICV2_LEN 4 + +/* +** Perform AES key wrap with padding. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "pOutputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_EncryptKWP(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + unsigned int padLen = BLOCK_PAD_POWER2(inputLen, AES_KEY_WRAP_BLOCK_SIZE); + unsigned int paddedInputLen = inputLen + padLen; + unsigned int outLen = paddedInputLen + AES_KEY_WRAP_BLOCK_SIZE; + unsigned char iv[AES_BLOCK_SIZE] = { AES_KEY_WRAP_ICV2 }; + unsigned char *newBuf; + SECStatus rv; + + *pOutputLen = outLen; + if (maxOutputLen < outLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + PORT_Assert((AES_KEY_WRAP_ICV2_LEN + sizeof(PRUint32)) == AES_KEY_WRAP_BLOCK_SIZE); + encode_PRUint32_BE(iv + AES_KEY_WRAP_ICV2_LEN, inputLen); + + /* If we can fit in an AES Block, just do and AES Encrypt, + * iv is big enough to handle this on the stack, so no need to allocate + */ + if (outLen == AES_BLOCK_SIZE) { + PORT_Assert(inputLen <= AES_KEY_WRAP_BLOCK_SIZE); + PORT_Memset(iv + AES_KEY_WRAP_BLOCK_SIZE, 0, AES_KEY_WRAP_BLOCK_SIZE); + PORT_Memcpy(iv + AES_KEY_WRAP_BLOCK_SIZE, input, inputLen); + rv = AES_Encrypt(&cx->aescx, output, pOutputLen, maxOutputLen, iv, + outLen); + PORT_Memset(iv, 0, sizeof(iv)); + return rv; + } + + /* add padding to our input block */ + newBuf = PORT_ZAlloc(paddedInputLen); + if (newBuf == NULL) { + return SECFailure; + } + PORT_Memcpy(newBuf, input, inputLen); + + rv = AESKeyWrap_W(cx, iv, output, pOutputLen, maxOutputLen, + newBuf, paddedInputLen); + PORT_ZFree(newBuf, paddedInputLen); + /* a little overkill, we only need to clear out the length, but this + * is easier to verify we got it all */ + PORT_Memset(iv, 0, sizeof(iv)); + return rv; +} + +/* +** Perform AES key unwrap with padding. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "pOutputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_DecryptKWP(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + unsigned int padLen; + unsigned int padLen2; + unsigned int outLen; + unsigned int paddedLen; + unsigned int good; + unsigned char *newBuf = NULL; + unsigned char *allocBuf = NULL; + int i; + unsigned char iv[AES_BLOCK_SIZE]; + PRUint32 magic; + SECStatus rv = SECFailure; + + paddedLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE; + /* unwrap the padded result */ + if (inputLen == AES_BLOCK_SIZE) { + rv = AES_Decrypt(&cx->aescx, iv, &outLen, inputLen, input, inputLen); + newBuf = &iv[AES_KEY_WRAP_BLOCK_SIZE]; + outLen -= AES_KEY_WRAP_BLOCK_SIZE; + } else { + /* if the caller supplied enough space to hold the unpadded buffer, + * we can unwrap directly into that unpadded buffer. Otherwise + * we allocate a buffer that can hold the padding, and we'll copy + * the result in a later step */ + newBuf = output; + if (maxOutputLen < paddedLen) { + allocBuf = newBuf = PORT_Alloc(paddedLen); + if (!allocBuf) { + return SECFailure; + } + } + /* We pass NULL for the first IV argument because we don't know + * what the IV has since in includes the length, so we don't have + * Winv verify it. We pass iv in the second argument to get the + * iv, which we verify below before we return anything */ + rv = AESKeyWrap_Winv(cx, NULL, iv, newBuf, &outLen, + paddedLen, input, inputLen); + } + if (rv != SECSuccess) { + goto loser; + } + rv = SECFailure; + if (outLen != paddedLen) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + goto loser; + } + + /* we verify the result in a constant time manner */ + /* verify ICV magic */ + magic = decode_PRUint32_BE(iv); + good = PORT_CT_EQ(magic, AES_KEY_WRAP_ICV2_INT32); + /* fetch and verify plain text length */ + outLen = decode_PRUint32_BE(iv + AES_KEY_WRAP_ICV2_LEN); + good &= PORT_CT_LE(outLen, paddedLen); + /* now verify the padding */ + padLen = paddedLen - outLen; + padLen2 = BLOCK_PAD_POWER2(outLen, AES_KEY_WRAP_BLOCK_SIZE); + good &= PORT_CT_EQ(padLen, padLen2); + for (i = 0; i < AES_KEY_WRAP_BLOCK_SIZE; i++) { + unsigned int doTest = PORT_CT_GT(padLen, i); + unsigned int result = PORT_CT_ZERO(newBuf[paddedLen - i - 1]); + good &= PORT_CT_SEL(doTest, result, PORT_CT_TRUE); + } + + /* now if anything was wrong, fail. At this point we will leak timing + * information, but we also 'leak' the error code as well. */ + if (!good) { + PORT_SetError(SEC_ERROR_BAD_DATA); + goto loser; + } + + /* now copy out the result */ + *pOutputLen = outLen; + if (maxOutputLen < outLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto loser; + } + if (output != newBuf) { + PORT_Memcpy(output, newBuf, outLen); + } + rv = SECSuccess; +loser: + /* if we failed, make sure we don't return any data to the user */ + if ((rv != SECSuccess) && (output == newBuf)) { + PORT_Memset(newBuf, 0, paddedLen); + } + /* clear out CSP sensitive data from the heap and stack */ + if (allocBuf) { + PORT_ZFree(allocBuf, paddedLen); + } + PORT_Memset(iv, 0, sizeof(iv)); + return rv; +} diff --git a/security/nss/lib/freebl/alghmac.c b/security/nss/lib/freebl/alghmac.c new file mode 100644 index 0000000000..58bbaa9ec8 --- /dev/null +++ b/security/nss/lib/freebl/alghmac.c @@ -0,0 +1,209 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secport.h" +#include "hasht.h" +#include "blapit.h" +#include "alghmac.h" +#include "secerr.h" + +#define HMAC_PAD_SIZE HASH_BLOCK_LENGTH_MAX + +struct HMACContextStr { + void *hash; + const SECHashObject *hashobj; + PRBool wasAllocated; + unsigned char ipad[HMAC_PAD_SIZE]; + unsigned char opad[HMAC_PAD_SIZE]; +}; + +void +HMAC_Destroy(HMACContext *cx, PRBool freeit) +{ + if (cx == NULL) + return; + + PORT_Assert(!freeit == !cx->wasAllocated); + if (cx->hash != NULL) { + cx->hashobj->destroy(cx->hash, PR_TRUE); + PORT_Memset(cx, 0, sizeof *cx); + } + if (freeit) + PORT_Free(cx); +} + +static SECStatus +hmac_initKey(HMACContext *cx, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS) +{ + unsigned int i; + unsigned char hashed_secret[HASH_LENGTH_MAX]; + + /* required by FIPS 198 Section 3 */ + if (isFIPS && secret_len < cx->hashobj->length / 2) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (secret_len > cx->hashobj->blocklength) { + cx->hashobj->begin(cx->hash); + cx->hashobj->update(cx->hash, secret, secret_len); + PORT_Assert(cx->hashobj->length <= sizeof hashed_secret); + cx->hashobj->end(cx->hash, hashed_secret, &secret_len, + sizeof hashed_secret); + if (secret_len != cx->hashobj->length) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + goto loser; + } + secret = (const unsigned char *)&hashed_secret[0]; + } + + PORT_Memset(cx->ipad, 0x36, cx->hashobj->blocklength); + PORT_Memset(cx->opad, 0x5c, cx->hashobj->blocklength); + + /* fold secret into padding */ + for (i = 0; i < secret_len; i++) { + cx->ipad[i] ^= secret[i]; + cx->opad[i] ^= secret[i]; + } + PORT_Memset(hashed_secret, 0, sizeof hashed_secret); + return SECSuccess; + +loser: + PORT_Memset(hashed_secret, 0, sizeof hashed_secret); + return SECFailure; +} + +SECStatus +HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS) +{ + SECStatus rv; + + if (cx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + cx->wasAllocated = PR_FALSE; + cx->hashobj = hash_obj; + cx->hash = cx->hashobj->create(); + if (cx->hash == NULL) + goto loser; + + rv = hmac_initKey(cx, secret, secret_len, isFIPS); + if (rv != SECSuccess) + goto loser; + + return rv; +loser: + if (cx->hash != NULL) + cx->hashobj->destroy(cx->hash, PR_TRUE); + return SECFailure; +} + +HMACContext * +HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS) +{ + SECStatus rv; + HMACContext *cx = PORT_ZNew(HMACContext); + if (cx == NULL) + return NULL; + rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS); + cx->wasAllocated = PR_TRUE; + if (rv != SECSuccess) { + PORT_Free(cx); /* contains no secret info */ + cx = NULL; + } + return cx; +} + +/* this allows us to reuse an existing HMACContext with a new key and + * Hash function */ +SECStatus +HMAC_ReInit(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS) +{ + PRBool wasAllocated; + SECStatus rv; + + /* if we are using the same hash, keep the hash contexts and only + * init the key */ + if ((cx->hashobj == hash_obj) && (cx->hash != NULL)) { + return hmac_initKey(cx, secret, secret_len, isFIPS); + } + /* otherwise we destroy the contents of the context and + * initalize it from scratch. We need to preseve the current state + * of wasAllocated to the final destroy works correctly */ + wasAllocated = cx->wasAllocated; + cx->wasAllocated = PR_FALSE; + HMAC_Destroy(cx, PR_FALSE); + rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS); + if (rv != SECSuccess) { + return rv; + } + cx->wasAllocated = wasAllocated; + return SECSuccess; +} + +void +HMAC_Begin(HMACContext *cx) +{ + /* start inner hash */ + cx->hashobj->begin(cx->hash); + cx->hashobj->update(cx->hash, cx->ipad, cx->hashobj->blocklength); +} + +void +HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len) +{ + cx->hashobj->update(cx->hash, data, data_len); +} + +SECStatus +HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len) +{ + if (max_result_len < cx->hashobj->length) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + cx->hashobj->end(cx->hash, result, result_len, max_result_len); + if (*result_len != cx->hashobj->length) + return SECFailure; + + cx->hashobj->begin(cx->hash); + cx->hashobj->update(cx->hash, cx->opad, cx->hashobj->blocklength); + cx->hashobj->update(cx->hash, result, *result_len); + cx->hashobj->end(cx->hash, result, result_len, max_result_len); + return SECSuccess; +} + +HMACContext * +HMAC_Clone(HMACContext *cx) +{ + HMACContext *newcx; + + newcx = (HMACContext *)PORT_ZAlloc(sizeof(HMACContext)); + if (newcx == NULL) + goto loser; + + newcx->wasAllocated = PR_TRUE; + newcx->hashobj = cx->hashobj; + newcx->hash = cx->hashobj->clone(cx->hash); + if (newcx->hash == NULL) + goto loser; + PORT_Memcpy(newcx->ipad, cx->ipad, cx->hashobj->blocklength); + PORT_Memcpy(newcx->opad, cx->opad, cx->hashobj->blocklength); + return newcx; + +loser: + HMAC_Destroy(newcx, PR_TRUE); + return NULL; +} diff --git a/security/nss/lib/freebl/alghmac.h b/security/nss/lib/freebl/alghmac.h new file mode 100644 index 0000000000..0e0d66a344 --- /dev/null +++ b/security/nss/lib/freebl/alghmac.h @@ -0,0 +1,70 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _ALGHMAC_H_ +#define _ALGHMAC_H_ + +typedef struct HMACContextStr HMACContext; + +SEC_BEGIN_PROTOS + +/* destroy HMAC context */ +extern void +HMAC_Destroy(HMACContext *cx, PRBool freeit); + +/* create HMAC context + * hash_obj hash object from SECRawHashObjects[] + * secret the secret with which the HMAC is performed. + * secret_len the length of the secret. + * isFIPS true if conforming to FIPS 198. + * + * NULL is returned if an error occurs. + */ +extern HMACContext * +HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS); + +/* like HMAC_Create, except caller allocates HMACContext. */ +SECStatus +HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS); + +/* like HMAC_Init, except caller passes in an existing context + * previously used by either HMAC_Create or HMAC_Init. */ +SECStatus +HMAC_ReInit(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS); + +/* reset HMAC for a fresh round */ +extern void +HMAC_Begin(HMACContext *cx); + +/* update HMAC + * cx HMAC Context + * data the data to perform HMAC on + * data_len the length of the data to process + */ +extern void +HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len); + +/* Finish HMAC -- place the results within result + * cx HMAC context + * result buffer for resulting hmac'd data + * result_len where the resultant hmac length is stored + * max_result_len maximum possible length that can be stored in result + */ +extern SECStatus +HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len); + +/* clone a copy of the HMAC state. this is usefult when you would + * need to keep a running hmac but also need to extract portions + * partway through the process. + */ +extern HMACContext * +HMAC_Clone(HMACContext *cx); + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/altivec-types.h b/security/nss/lib/freebl/altivec-types.h new file mode 100644 index 0000000000..118a7e0879 --- /dev/null +++ b/security/nss/lib/freebl/altivec-types.h @@ -0,0 +1,25 @@ +/* + * altivec-types.h - shorter vector typedefs + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _ALTIVEC_TYPES_H_ +#define _ALTIVEC_TYPES_H_ 1 + +#include + +typedef __vector unsigned char vec_u8; +typedef __vector signed char vec_s8; +typedef __vector unsigned short vec_u16; +typedef __vector signed short vec_s16; +typedef __vector unsigned int vec_u32; +typedef __vector signed int vec_s32; +#ifdef __VSX__ +typedef __vector unsigned long long vec_u64; +typedef __vector signed long long vec_s64; +#endif +typedef __vector float vec_f; + +#endif diff --git a/security/nss/lib/freebl/arcfive.c b/security/nss/lib/freebl/arcfive.c new file mode 100644 index 0000000000..dda77710fe --- /dev/null +++ b/security/nss/lib/freebl/arcfive.c @@ -0,0 +1,87 @@ +/* + * arcfive.c - stubs for RC5 - NOT a working implementation! + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "prerror.h" + +/******************************************/ +/* +** RC5 symmetric block cypher -- 64-bit block size +*/ + +/* +** Create a new RC5 context suitable for RC5 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC) +** "mode" one of NSS_RC5 or NSS_RC5_CBC +** +** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block +** chaining" mode. +*/ +RC5Context * +RC5_CreateContext(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return NULL; +} + +/* +** Destroy an RC5 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +void +RC5_DestroyContext(RC5Context *cx, PRBool freeit) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); +} + +/* +** Perform RC5 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return SECFailure; +} + +/* +** Perform RC5 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return SECFailure; +} diff --git a/security/nss/lib/freebl/arcfour-amd64-gas.s b/security/nss/lib/freebl/arcfour-amd64-gas.s new file mode 100644 index 0000000000..7c4f5358f1 --- /dev/null +++ b/security/nss/lib/freebl/arcfour-amd64-gas.s @@ -0,0 +1,88 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# ** ARCFOUR implementation optimized for AMD64. +# ** +# ** The throughput achieved by this code is about 320 MBytes/sec, on +# ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.text +.align 16 +.globl ARCFOUR +.type ARCFOUR,@function +ARCFOUR: + pushq %rbp + pushq %rbx + movq %rdi, %rbp # key = ARG(key) + movq %rsi, %rbx # rbx = ARG(len) + movq %rdx, %rsi # in = ARG(in) + movq %rcx, %rdi # out = ARG(out) + movq (%rbp), %rcx # x = key->x + movq 8(%rbp), %rdx # y = key->y + addq $16, %rbp # d = key->data + incq %rcx # x++ + andq $255, %rcx # x &= 0xff + leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8 + movq %rbx, %r9 # tmp = in+len-8 + movq 0(%rbp,%rcx,8), %rax # tx = d[x] + cmpq %rsi, %rbx # cmp in with in+len-8 + jl .Lend # jump if (in+len-8 < in) + +.Lstart: + addq $8, %rsi # increment in + addq $8, %rdi # increment out + + # generate the next 8 bytes of the rc4 stream into %r8 + movq $8, %r11 # byte counter +1: addb %al, %dl # y += tx + movl 0(%rbp,%rdx,8), %ebx # ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty + addb %al, %bl # val = ty + tx + movl %eax, 0(%rbp,%rdx,8) # d[y] = tx + incb %cl # x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b # val = d[val] + decb %r11b + rorq $8, %r8 # (ror does not change ZF) + jnz 1b + + # xor 8 bytes + xorq -8(%rsi), %r8 + cmpq %r9, %rsi # cmp in+len-8 with in + movq %r8, -8(%rdi) + jle .Lstart # jump if (in <= in+len-8) + +.Lend: + addq $8, %r9 # tmp = in+len + + # handle the last bytes, one by one +1: cmpq %rsi, %r9 # cmp in with in+len + jle .Lfinished # jump if (in+len <= in) + addb %al, %dl # y += tx + movl 0(%rbp,%rdx,8), %ebx # ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty + addb %al, %bl # val = ty + tx + movl %eax, 0(%rbp,%rdx,8) # d[y] = tx + incb %cl # x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b # val = d[val] + xorb (%rsi), %r8b # xor 1 byte + movb %r8b, (%rdi) + incq %rsi # in++ + incq %rdi # out++ + jmp 1b + +.Lfinished: + decq %rcx # x-- + movb %dl, -8(%rbp) # key->y = y + movb %cl, -16(%rbp) # key->x = x + popq %rbx + popq %rbp + ret +.L_ARCFOUR_end: +.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR + +# Magic indicating no need for an executable stack +.section .note.GNU-stack,"",@progbits +.previous diff --git a/security/nss/lib/freebl/arcfour-amd64-masm.asm b/security/nss/lib/freebl/arcfour-amd64-masm.asm new file mode 100644 index 0000000000..1601c4f899 --- /dev/null +++ b/security/nss/lib/freebl/arcfour-amd64-masm.asm @@ -0,0 +1,107 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +; ** ARCFOUR implementation optimized for AMD64. +; ** +; ** The throughput achieved by this code is about 320 MBytes/sec, on +; ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.CODE + +; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, +; const unsigned char *input, unsigned char *output); + + +ARCFOUR PROC + + push rbp + push rbx + push rsi + push rdi + + mov rbp, rcx ; key = ARG(key) + mov rbx, rdx ; rbx = ARG(len) + mov rsi, r8 ; in = ARG(in) + mov rdi, r9 ; out = ARG(out) + mov rcx, [rbp] ; x = key->x + mov rdx, [rbp+8] ; y = key->y + add rbp, 16 ; d = key->data + inc rcx ; x++ + and rcx, 0ffh ; x &= 0xff + lea rbx, [rbx+rsi-8] ; rbx = in+len-8 + mov r9, rbx ; tmp = in+len-8 + mov rax, [rbp+rcx*8] ; tx = d[x] + cmp rbx, rsi ; cmp in with in+len-8 + jl Lend ; jump if (in+len-8 < in) + +Lstart: + add rsi, 8 ; increment in + add rdi, 8 ; increment out + + ; + ; generate the next 8 bytes of the rc4 stream into r8 + ; + + mov r11, 8 ; byte counter + +@@: + add dl, al ; y += tx + mov ebx, [rbp+rdx*8] ; ty = d[y] + mov [rbp+rcx*8], ebx ; d[x] = ty + add bl, al ; val = ty + tx + mov [rbp+rdx*8], eax ; d[y] = tx + inc cl ; x++ (NEXT ROUND) + mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) + mov r8b, [rbp+rbx*8] ; val = d[val] + dec r11b + ror r8, 8 ; (ror does not change ZF) + jnz @b + + ; + ; xor 8 bytes + ; + + xor r8, [rsi-8] + cmp rsi, r9 ; cmp in+len-8 with in + mov [rdi-8], r8 + jle Lstart + +Lend: + add r9, 8 ; tmp = in+len + + ; + ; handle the last bytes, one by one + ; + +@@: + cmp r9, rsi ; cmp in with in+len + jle Lfinished ; jump if (in+len <= in) + add dl, al ; y += tx + mov ebx, [rbp+rdx*8] ; ty = d[y] + mov [rbp+rcx*8], ebx ; d[x] = ty + add bl, al ; val = ty + tx + mov [rbp+rdx*8], eax ; d[y] = tx + inc cl ; x++ (NEXT ROUND) + mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) + mov r8b, [rbp+rbx*8] ; val = d[val] + xor r8b, [rsi] ; xor 1 byte + mov [rdi], r8b + inc rsi ; in++ + inc rdi + jmp @b + +Lfinished: + dec rcx ; x-- + mov [rbp-8], dl ; key->y = y + mov [rbp-16], cl ; key->x = x + + pop rdi + pop rsi + pop rbx + pop rbp + ret + +ARCFOUR ENDP + +END diff --git a/security/nss/lib/freebl/arcfour-amd64-sun.s b/security/nss/lib/freebl/arcfour-amd64-sun.s new file mode 100644 index 0000000000..8b649f9014 --- /dev/null +++ b/security/nss/lib/freebl/arcfour-amd64-sun.s @@ -0,0 +1,84 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/ ** ARCFOUR implementation optimized for AMD64. +/ ** +/ ** The throughput achieved by this code is about 320 MBytes/sec, on +/ ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.text +.align 16 +.globl ARCFOUR +.type ARCFOUR,@function +ARCFOUR: + pushq %rbp + pushq %rbx + movq %rdi, %rbp / key = ARG(key) + movq %rsi, %rbx / rbx = ARG(len) + movq %rdx, %rsi / in = ARG(in) + movq %rcx, %rdi / out = ARG(out) + movq (%rbp), %rcx / x = key->x + movq 8(%rbp), %rdx / y = key->y + addq $16, %rbp / d = key->data + incq %rcx / x++ + andq $255, %rcx / x &= 0xff + leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8 + movq %rbx, %r9 / tmp = in+len-8 + movq 0(%rbp,%rcx,8), %rax / tx = d[x] + cmpq %rsi, %rbx / cmp in with in+len-8 + jl .Lend / jump if (in+len-8 < in) + +.Lstart: + addq $8, %rsi / increment in + addq $8, %rdi / increment out + + / generate the next 8 bytes of the rc4 stream into %r8 + movq $8, %r11 / byte counter +1: addb %al, %dl / y += tx + movl 0(%rbp,%rdx,8), %ebx / ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty + addb %al, %bl / val = ty + tx + movl %eax, 0(%rbp,%rdx,8) / d[y] = tx + incb %cl / x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b / val = d[val] + decb %r11b + rorq $8, %r8 / (ror does not change ZF) + jnz 1b + + / xor 8 bytes + xorq -8(%rsi), %r8 + cmpq %r9, %rsi / cmp in+len-8 with in + movq %r8, -8(%rdi) + jle .Lstart / jump if (in <= in+len-8) + +.Lend: + addq $8, %r9 / tmp = in+len + + / handle the last bytes, one by one +1: cmpq %rsi, %r9 / cmp in with in+len + jle .Lfinished / jump if (in+len <= in) + addb %al, %dl / y += tx + movl 0(%rbp,%rdx,8), %ebx / ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty + addb %al, %bl / val = ty + tx + movl %eax, 0(%rbp,%rdx,8) / d[y] = tx + incb %cl / x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b / val = d[val] + xorb (%rsi), %r8b / xor 1 byte + movb %r8b, (%rdi) + incq %rsi / in++ + incq %rdi / out++ + jmp 1b + +.Lfinished: + decq %rcx / x-- + movb %dl, -8(%rbp) / key->y = y + movb %cl, -16(%rbp) / key->x = x + popq %rbx + popq %rbp + ret +.L_ARCFOUR_end: +.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR diff --git a/security/nss/lib/freebl/arcfour.c b/security/nss/lib/freebl/arcfour.c new file mode 100644 index 0000000000..72e696e523 --- /dev/null +++ b/security/nss/lib/freebl/arcfour.c @@ -0,0 +1,594 @@ +/* arcfour.c - the arc four algorithm. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" + +/* Architecture-dependent defines */ + +#if defined(SOLARIS) || defined(HPUX) || defined(NSS_X86) || \ + defined(_WIN64) +/* Convert the byte-stream to a word-stream */ +#define CONVERT_TO_WORDS +#endif + +#if defined(AIX) || defined(NSS_BEVAND_ARCFOUR) +/* Treat array variables as words, not bytes, on CPUs that take + * much longer to write bytes than to write words, or when using + * assembler code that required it. + */ +#define USE_WORD +#endif + +#if defined(IS_64) || defined(NSS_BEVAND_ARCFOUR) +typedef PRUint64 WORD; +#else +typedef PRUint32 WORD; +#endif +#define WORDSIZE sizeof(WORD) + +#if defined(USE_WORD) +typedef WORD Stype; +#else +typedef PRUint8 Stype; +#endif + +#define ARCFOUR_STATE_SIZE 256 + +#define MASK1BYTE (WORD)(0xff) + +#define SWAP(a, b) \ + tmp = a; \ + a = b; \ + b = tmp; + +/* + * State information for stream cipher. + */ +struct RC4ContextStr { +#if defined(NSS_ARCFOUR_IJ_B4_S) || defined(NSS_BEVAND_ARCFOUR) + Stype i; + Stype j; + Stype S[ARCFOUR_STATE_SIZE]; +#else + Stype S[ARCFOUR_STATE_SIZE]; + Stype i; + Stype j; +#endif +}; + +/* + * array indices [0..255] to initialize cx->S array (faster than loop). + */ +static const Stype Kinit[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +RC4Context * +RC4_AllocateContext(void) +{ + return PORT_ZNew(RC4Context); +} + +SECStatus +RC4_InitContext(RC4Context *cx, const unsigned char *key, unsigned int len, + const unsigned char *unused1, int unused2, + unsigned int unused3, unsigned int unused4) +{ + unsigned int i; + PRUint8 j, tmp; + PRUint8 K[256]; + PRUint8 *L; + + /* verify the key length. */ + PORT_Assert(len > 0 && len < ARCFOUR_STATE_SIZE); + if (len == 0 || len >= ARCFOUR_STATE_SIZE) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + if (cx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* Initialize the state using array indices. */ + memcpy(cx->S, Kinit, sizeof cx->S); + /* Fill in K repeatedly with values from key. */ + L = K; + for (i = sizeof K; i > len; i -= len) { + memcpy(L, key, len); + L += len; + } + memcpy(L, key, i); + /* Stir the state of the generator. At this point it is assumed + * that the key is the size of the state buffer. If this is not + * the case, the key bytes are repeated to fill the buffer. + */ + j = 0; +#define ARCFOUR_STATE_STIR(ii) \ + j = j + cx->S[ii] + K[ii]; \ + SWAP(cx->S[ii], cx->S[j]); + for (i = 0; i < ARCFOUR_STATE_SIZE; i++) { + ARCFOUR_STATE_STIR(i); + } + cx->i = 0; + cx->j = 0; + return SECSuccess; +} + +/* + * Initialize a new generator. + */ +RC4Context * +RC4_CreateContext(const unsigned char *key, int len) +{ + RC4Context *cx = RC4_AllocateContext(); + if (cx) { + SECStatus rv = RC4_InitContext(cx, key, len, NULL, 0, 0, 0); + if (rv != SECSuccess) { + PORT_ZFree(cx, sizeof(*cx)); + cx = NULL; + } + } + return cx; +} + +void +RC4_DestroyContext(RC4Context *cx, PRBool freeit) +{ + if (freeit) + PORT_ZFree(cx, sizeof(*cx)); +} + +#if defined(NSS_BEVAND_ARCFOUR) +extern void ARCFOUR(RC4Context *cx, WORD inputLen, + const unsigned char *input, unsigned char *output); +#else +/* + * Generate the next byte in the stream. + */ +#define ARCFOUR_NEXT_BYTE() \ + tmpSi = cx->S[++tmpi]; \ + tmpj += tmpSi; \ + tmpSj = cx->S[tmpj]; \ + cx->S[tmpi] = tmpSj; \ + cx->S[tmpj] = tmpSi; \ + t = tmpSi + tmpSj; + +#ifdef CONVERT_TO_WORDS +/* + * Straight ARCFOUR op. No optimization. + */ +static SECStatus +rc4_no_opt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint8 t; + Stype tmpSi, tmpSj; + register PRUint8 tmpi = cx->i; + register PRUint8 tmpj = cx->j; + unsigned int index; + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + for (index = 0; index < inputLen; index++) { + /* Generate next byte from stream. */ + ARCFOUR_NEXT_BYTE(); + /* output = next stream byte XOR next input byte */ + output[index] = cx->S[t] ^ input[index]; + } + *outputLen = inputLen; + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; +} + +#else +/* !CONVERT_TO_WORDS */ + +/* + * Byte-at-a-time ARCFOUR, unrolling the loop into 8 pieces. + */ +static SECStatus +rc4_unrolled(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint8 t; + Stype tmpSi, tmpSj; + register PRUint8 tmpi = cx->i; + register PRUint8 tmpj = cx->j; + int index; + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + for (index = inputLen / 8; index-- > 0; input += 8, output += 8) { + ARCFOUR_NEXT_BYTE(); + output[0] = cx->S[t] ^ input[0]; + ARCFOUR_NEXT_BYTE(); + output[1] = cx->S[t] ^ input[1]; + ARCFOUR_NEXT_BYTE(); + output[2] = cx->S[t] ^ input[2]; + ARCFOUR_NEXT_BYTE(); + output[3] = cx->S[t] ^ input[3]; + ARCFOUR_NEXT_BYTE(); + output[4] = cx->S[t] ^ input[4]; + ARCFOUR_NEXT_BYTE(); + output[5] = cx->S[t] ^ input[5]; + ARCFOUR_NEXT_BYTE(); + output[6] = cx->S[t] ^ input[6]; + ARCFOUR_NEXT_BYTE(); + output[7] = cx->S[t] ^ input[7]; + } + index = inputLen % 8; + if (index) { + input += index; + output += index; + switch (index) { + case 7: + ARCFOUR_NEXT_BYTE(); + output[-7] = cx->S[t] ^ input[-7]; /* FALLTHRU */ + case 6: + ARCFOUR_NEXT_BYTE(); + output[-6] = cx->S[t] ^ input[-6]; /* FALLTHRU */ + case 5: + ARCFOUR_NEXT_BYTE(); + output[-5] = cx->S[t] ^ input[-5]; /* FALLTHRU */ + case 4: + ARCFOUR_NEXT_BYTE(); + output[-4] = cx->S[t] ^ input[-4]; /* FALLTHRU */ + case 3: + ARCFOUR_NEXT_BYTE(); + output[-3] = cx->S[t] ^ input[-3]; /* FALLTHRU */ + case 2: + ARCFOUR_NEXT_BYTE(); + output[-2] = cx->S[t] ^ input[-2]; /* FALLTHRU */ + case 1: + ARCFOUR_NEXT_BYTE(); + output[-1] = cx->S[t] ^ input[-1]; /* FALLTHRU */ + default: + /* FALLTHRU */ + ; /* hp-ux build breaks without this */ + } + } + cx->i = tmpi; + cx->j = tmpj; + *outputLen = inputLen; + return SECSuccess; +} +#endif + +#ifdef IS_LITTLE_ENDIAN +#define ARCFOUR_NEXT4BYTES_L(n) \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 8); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 16); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 24); +#else +#define ARCFOUR_NEXT4BYTES_B(n) \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 24); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 16); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 8); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n); +#endif + +#if (defined(IS_64) && !defined(__sparc)) || defined(NSS_USE_64) +/* 64-bit wordsize */ +#ifdef IS_LITTLE_ENDIAN +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_L(0); \ + ARCFOUR_NEXT4BYTES_L(32); \ + } +#else +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_B(32); \ + ARCFOUR_NEXT4BYTES_B(0); \ + } +#endif +#else +/* 32-bit wordsize */ +#ifdef IS_LITTLE_ENDIAN +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_L(0); \ + } +#else +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_B(0); \ + } +#endif +#endif + +#ifdef IS_LITTLE_ENDIAN +#define RSH << +#define LSH >> +#else +#define RSH >> +#define LSH << +#endif + +#ifdef IS_LITTLE_ENDIAN +#define LEFTMOST_BYTE_SHIFT 0 +#define NEXT_BYTE_SHIFT(shift) shift + 8 +#else +#define LEFTMOST_BYTE_SHIFT 8 * (WORDSIZE - 1) +#define NEXT_BYTE_SHIFT(shift) shift - 8 +#endif + +#ifdef CONVERT_TO_WORDS +static SECStatus +rc4_wordconv(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PR_STATIC_ASSERT(sizeof(PRUword) == sizeof(ptrdiff_t)); + unsigned int inOffset = (PRUword)input % WORDSIZE; + unsigned int outOffset = (PRUword)output % WORDSIZE; + register WORD streamWord; + register const WORD *pInWord; + register WORD *pOutWord; + register WORD inWord, nextInWord; + PRUint8 t; + register Stype tmpSi, tmpSj; + register PRUint8 tmpi = cx->i; + register PRUint8 tmpj = cx->j; + unsigned int bufShift, invBufShift; + unsigned int i; + const unsigned char *finalIn; + unsigned char *finalOut; + + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + if (inputLen < 2 * WORDSIZE) { + /* Ignore word conversion, do byte-at-a-time */ + return rc4_no_opt(cx, output, outputLen, maxOutputLen, input, inputLen); + } + *outputLen = inputLen; + pInWord = (const WORD *)(input - inOffset); + pOutWord = (WORD *)(output - outOffset); + if (inOffset <= outOffset) { + bufShift = 8 * (outOffset - inOffset); + invBufShift = 8 * WORDSIZE - bufShift; + } else { + invBufShift = 8 * (inOffset - outOffset); + bufShift = 8 * WORDSIZE - invBufShift; + } + /*****************************************************************/ + /* Step 1: */ + /* If the first output word is partial, consume the bytes in the */ + /* first partial output word by loading one or two words of */ + /* input and shifting them accordingly. Otherwise, just load */ + /* in the first word of input. At the end of this block, at */ + /* least one partial word of input should ALWAYS be loaded. */ + /*****************************************************************/ + if (outOffset) { + unsigned int byteCount = WORDSIZE - outOffset; + for (i = 0; i < byteCount; i++) { + ARCFOUR_NEXT_BYTE(); + output[i] = cx->S[t] ^ input[i]; + } + /* Consumed byteCount bytes of input */ + inputLen -= byteCount; + pInWord++; + + /* move to next word of output */ + pOutWord++; + + /* If buffers are relatively misaligned, shift the bytes in inWord + * to be aligned to the output buffer. + */ + if (inOffset < outOffset) { + /* The first input word (which may be partial) has more bytes + * than needed. Copy the remainder to inWord. + */ + unsigned int shift = LEFTMOST_BYTE_SHIFT; + inWord = 0; + for (i = 0; i < outOffset - inOffset; i++) { + inWord |= (WORD)input[byteCount + i] << shift; + shift = NEXT_BYTE_SHIFT(shift); + } + } else if (inOffset > outOffset) { + /* Consumed some bytes in the second input word. Copy the + * remainder to inWord. + */ + inWord = *pInWord++; + inWord = inWord LSH invBufShift; + } else { + inWord = 0; + } + } else { + /* output is word-aligned */ + if (inOffset) { + /* Input is not word-aligned. The first word load of input + * will not produce a full word of input bytes, so one word + * must be pre-loaded. The main loop below will load in the + * next input word and shift some of its bytes into inWord + * in order to create a full input word. Note that the main + * loop must execute at least once because the input must + * be at least two words. + */ + unsigned int shift = LEFTMOST_BYTE_SHIFT; + inWord = 0; + for (i = 0; i < WORDSIZE - inOffset; i++) { + inWord |= (WORD)input[i] << shift; + shift = NEXT_BYTE_SHIFT(shift); + } + pInWord++; + } else { + /* Input is word-aligned. The first word load of input + * will produce a full word of input bytes, so nothing + * needs to be loaded here. + */ + inWord = 0; + } + } + /*****************************************************************/ + /* Step 2: main loop */ + /* At this point the output buffer is word-aligned. Any unused */ + /* bytes from above will be in inWord (shifted correctly). If */ + /* the input buffer is unaligned relative to the output buffer, */ + /* shifting has to be done. */ + /*****************************************************************/ + if (bufShift) { + /* preloadedByteCount is the number of input bytes pre-loaded + * in inWord. + */ + unsigned int preloadedByteCount = bufShift / 8; + for (; inputLen >= preloadedByteCount + WORDSIZE; + inputLen -= WORDSIZE) { + nextInWord = *pInWord++; + inWord |= nextInWord RSH bufShift; + nextInWord = nextInWord LSH invBufShift; + ARCFOUR_NEXT_WORD(); + *pOutWord++ = inWord ^ streamWord; + inWord = nextInWord; + } + if (inputLen == 0) { + /* Nothing left to do. */ + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; + } + finalIn = (const unsigned char *)pInWord - preloadedByteCount; + } else { + for (; inputLen >= WORDSIZE; inputLen -= WORDSIZE) { + inWord = *pInWord++; + ARCFOUR_NEXT_WORD(); + *pOutWord++ = inWord ^ streamWord; + } + if (inputLen == 0) { + /* Nothing left to do. */ + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; + } + finalIn = (const unsigned char *)pInWord; + } + /*****************************************************************/ + /* Step 3: */ + /* Do the remaining partial word of input one byte at a time. */ + /*****************************************************************/ + finalOut = (unsigned char *)pOutWord; + for (i = 0; i < inputLen; i++) { + ARCFOUR_NEXT_BYTE(); + finalOut[i] = cx->S[t] ^ finalIn[i]; + } + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; +} +#endif +#endif /* NSS_BEVAND_ARCFOUR */ + +SECStatus +RC4_Encrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +#if defined(NSS_BEVAND_ARCFOUR) + ARCFOUR(cx, inputLen, input, output); + *outputLen = inputLen; + return SECSuccess; +#elif defined(CONVERT_TO_WORDS) + /* Convert the byte-stream to a word-stream */ + return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen); +#else + /* Operate on bytes, but unroll the main loop */ + return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen); +#endif +} + +SECStatus +RC4_Decrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +/* decrypt and encrypt are same operation. */ +#if defined(NSS_BEVAND_ARCFOUR) + ARCFOUR(cx, inputLen, input, output); + *outputLen = inputLen; + return SECSuccess; +#elif defined(CONVERT_TO_WORDS) + /* Convert the byte-stream to a word-stream */ + return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen); +#else + /* Operate on bytes, but unroll the main loop */ + return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen); +#endif +} + +#undef CONVERT_TO_WORDS +#undef USE_WORD diff --git a/security/nss/lib/freebl/blake2b.c b/security/nss/lib/freebl/blake2b.c new file mode 100644 index 0000000000..2f14bfc978 --- /dev/null +++ b/security/nss/lib/freebl/blake2b.c @@ -0,0 +1,428 @@ +/* + * blake2b.c - definitions for the blake2b hash function + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secerr.h" +#include "blapi.h" +#include "blake2b.h" +#include "crypto_primitives.h" + +/** + * This contains the BLAKE2b initialization vectors. + */ +static const uint64_t iv[8] = { + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL, + 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/** + * This contains the table of permutations for blake2b compression function. + */ +static const uint8_t sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + +/** + * This function increments the blake2b ctx counter. + */ +void +blake2b_IncrementCounter(BLAKE2BContext* ctx, const uint64_t inc) +{ + ctx->t[0] += inc; + ctx->t[1] += ctx->t[0] < inc; +} + +/** + * This macro implements the blake2b mixing function which mixes two 8-byte + * words from the message into the hash. + */ +#define G(a, b, c, d, x, y) \ + a += b + x; \ + d = ROTR64(d ^ a, 32); \ + c += d; \ + b = ROTR64(b ^ c, 24); \ + a += b + y; \ + d = ROTR64(d ^ a, 16); \ + c += d; \ + b = ROTR64(b ^ c, 63) + +#define ROUND(i) \ + G(v[0], v[4], v[8], v[12], m[sigma[i][0]], m[sigma[i][1]]); \ + G(v[1], v[5], v[9], v[13], m[sigma[i][2]], m[sigma[i][3]]); \ + G(v[2], v[6], v[10], v[14], m[sigma[i][4]], m[sigma[i][5]]); \ + G(v[3], v[7], v[11], v[15], m[sigma[i][6]], m[sigma[i][7]]); \ + G(v[0], v[5], v[10], v[15], m[sigma[i][8]], m[sigma[i][9]]); \ + G(v[1], v[6], v[11], v[12], m[sigma[i][10]], m[sigma[i][11]]); \ + G(v[2], v[7], v[8], v[13], m[sigma[i][12]], m[sigma[i][13]]); \ + G(v[3], v[4], v[9], v[14], m[sigma[i][14]], m[sigma[i][15]]) + +/** + * The blake2b compression function which takes a full 128-byte chunk of the + * input message and mixes it into the ongoing ctx array, i.e., permute the + * ctx while xoring in the block of data. + */ +void +blake2b_Compress(BLAKE2BContext* ctx, const uint8_t* block) +{ + size_t i; + uint64_t v[16], m[16]; + + PORT_Memcpy(m, block, BLAKE2B_BLOCK_LENGTH); +#if !defined(IS_LITTLE_ENDIAN) + for (i = 0; i < 16; ++i) { + m[i] = FREEBL_HTONLL(m[i]); + } +#endif + + PORT_Memcpy(v, ctx->h, 8 * 8); + PORT_Memcpy(v + 8, iv, 8 * 8); + + v[12] ^= ctx->t[0]; + v[13] ^= ctx->t[1]; + v[14] ^= ctx->f; + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + ROUND(10); + ROUND(11); + + for (i = 0; i < 8; i++) { + ctx->h[i] ^= v[i] ^ v[i + 8]; + } +} + +/** + * This function can be used for both keyed and unkeyed version. + */ +BLAKE2BContext* +BLAKE2B_NewContext() +{ + return PORT_ZNew(BLAKE2BContext); +} + +/** + * Zero and free the context and can be used for both keyed and unkeyed version. + */ +void +BLAKE2B_DestroyContext(BLAKE2BContext* ctx, PRBool freeit) +{ + PORT_Memset(ctx, 0, sizeof(*ctx)); + if (freeit) { + PORT_Free(ctx); + } +} + +/** + * This function initializes blake2b ctx and can be used for both keyed and + * unkeyed version. It also checks ctx and sets error states. + */ +static SECStatus +blake2b_Begin(BLAKE2BContext* ctx, uint8_t outlen, const uint8_t* key, + size_t keylen) +{ + if (!ctx) { + goto failure_noclean; + } + if (outlen == 0 || outlen > BLAKE2B512_LENGTH) { + goto failure; + } + if (key && keylen > BLAKE2B_KEY_SIZE) { + goto failure; + } + /* Note: key can be null if it's unkeyed. */ + if ((key == NULL && keylen > 0) || keylen > BLAKE2B_KEY_SIZE || + (key != NULL && keylen == 0)) { + goto failure; + } + + /* Mix key size(keylen) and desired hash length(outlen) into h0 */ + uint64_t param = outlen ^ (keylen << 8) ^ (1 << 16) ^ (1 << 24); + PORT_Memcpy(ctx->h, iv, 8 * 8); + ctx->h[0] ^= param; + ctx->outlen = outlen; + + /* This updates the context for only the keyed version */ + if (keylen > 0 && keylen <= BLAKE2B_KEY_SIZE && key) { + uint8_t block[BLAKE2B_BLOCK_LENGTH] = { 0 }; + PORT_Memcpy(block, key, keylen); + BLAKE2B_Update(ctx, block, BLAKE2B_BLOCK_LENGTH); + PORT_Memset(block, 0, BLAKE2B_BLOCK_LENGTH); + } + + return SECSuccess; + +failure: + PORT_Memset(ctx, 0, sizeof(*ctx)); +failure_noclean: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; +} + +SECStatus +BLAKE2B_Begin(BLAKE2BContext* ctx) +{ + return blake2b_Begin(ctx, BLAKE2B512_LENGTH, NULL, 0); +} + +SECStatus +BLAKE2B_MAC_Begin(BLAKE2BContext* ctx, const PRUint8* key, const size_t keylen) +{ + PORT_Assert(key != NULL); + if (!key) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return blake2b_Begin(ctx, BLAKE2B512_LENGTH, (const uint8_t*)key, keylen); +} + +static void +blake2b_IncrementCompress(BLAKE2BContext* ctx, size_t blockLength, + const unsigned char* input) +{ + blake2b_IncrementCounter(ctx, blockLength); + blake2b_Compress(ctx, input); +} + +/** + * This function updates blake2b ctx and can be used for both keyed and unkeyed + * version. + */ +SECStatus +BLAKE2B_Update(BLAKE2BContext* ctx, const unsigned char* in, + unsigned int inlen) +{ + /* Nothing to do if there's nothing. */ + if (inlen == 0) { + return SECSuccess; + } + + if (!ctx || !in) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Is this a reused context? */ + if (ctx->f) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + size_t left = ctx->buflen; + PORT_Assert(left <= BLAKE2B_BLOCK_LENGTH); + size_t fill = BLAKE2B_BLOCK_LENGTH - left; + + if (inlen > fill) { + if (ctx->buflen) { + /* There's some remaining data in ctx->buf that we have to prepend + * to in. */ + PORT_Memcpy(ctx->buf + left, in, fill); + ctx->buflen = 0; + blake2b_IncrementCompress(ctx, BLAKE2B_BLOCK_LENGTH, ctx->buf); + in += fill; + inlen -= fill; + } + while (inlen > BLAKE2B_BLOCK_LENGTH) { + blake2b_IncrementCompress(ctx, BLAKE2B_BLOCK_LENGTH, in); + in += BLAKE2B_BLOCK_LENGTH; + inlen -= BLAKE2B_BLOCK_LENGTH; + } + } + + /* Store the remaining data from in in ctx->buf to process later. + * Note that ctx->buflen can be BLAKE2B_BLOCK_LENGTH. We can't process that + * here because we have to update ctx->f before compressing the last block. + */ + PORT_Assert(inlen <= BLAKE2B_BLOCK_LENGTH); + PORT_Memcpy(ctx->buf + ctx->buflen, in, inlen); + ctx->buflen += inlen; + + return SECSuccess; +} + +/** + * This function finalizes ctx, pads final block and stores hash. + * It can be used for both keyed and unkeyed version. + */ +SECStatus +BLAKE2B_End(BLAKE2BContext* ctx, unsigned char* out, + unsigned int* digestLen, size_t maxDigestLen) +{ + size_t i; + unsigned int outlen = PR_MIN(BLAKE2B512_LENGTH, maxDigestLen); + + /* Argument checks */ + if (!ctx || !out) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Sanity check against outlen in context. */ + if (ctx->outlen < outlen) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Is this a reused context? */ + if (ctx->f != 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Process the remaining data from ctx->buf (padded with 0). */ + blake2b_IncrementCounter(ctx, ctx->buflen); + /* BLAKE2B_BLOCK_LENGTH - ctx->buflen can be 0. */ + PORT_Memset(ctx->buf + ctx->buflen, 0, BLAKE2B_BLOCK_LENGTH - ctx->buflen); + ctx->f = UINT64_MAX; + blake2b_Compress(ctx, ctx->buf); + + /* Write out the blake2b context(ctx). */ + for (i = 0; i < outlen; ++i) { + out[i] = ctx->h[i / 8] >> ((i % 8) * 8); + } + + if (digestLen) { + *digestLen = outlen; + } + + return SECSuccess; +} + +SECStatus +blake2b_HashBuf(uint8_t* output, const uint8_t* input, uint8_t outlen, + size_t inlen, const uint8_t* key, size_t keylen) +{ + SECStatus rv = SECFailure; + BLAKE2BContext ctx = { { 0 } }; + + if (inlen != 0) { + PORT_Assert(input != NULL); + if (input == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto done; + } + } + + PORT_Assert(output != NULL); + if (output == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto done; + } + + if (blake2b_Begin(&ctx, outlen, key, keylen) != SECSuccess) { + goto done; + } + + if (BLAKE2B_Update(&ctx, input, inlen) != SECSuccess) { + goto done; + } + + if (BLAKE2B_End(&ctx, output, NULL, outlen) != SECSuccess) { + goto done; + } + rv = SECSuccess; + +done: + PORT_Memset(&ctx, 0, sizeof ctx); + return rv; +} + +SECStatus +BLAKE2B_Hash(unsigned char* dest, const char* src) +{ + return blake2b_HashBuf(dest, (const unsigned char*)src, BLAKE2B512_LENGTH, + PORT_Strlen(src), NULL, 0); +} + +SECStatus +BLAKE2B_HashBuf(unsigned char* output, const unsigned char* input, PRUint32 inlen) +{ + return blake2b_HashBuf(output, input, BLAKE2B512_LENGTH, inlen, NULL, 0); +} + +SECStatus +BLAKE2B_MAC_HashBuf(unsigned char* output, const unsigned char* input, + unsigned int inlen, const unsigned char* key, + unsigned int keylen) +{ + PORT_Assert(key != NULL); + if (!key && keylen <= BLAKE2B_KEY_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return blake2b_HashBuf(output, input, BLAKE2B512_LENGTH, inlen, key, keylen); +} + +unsigned int +BLAKE2B_FlattenSize(BLAKE2BContext* ctx) +{ + return sizeof(BLAKE2BContext); +} + +SECStatus +BLAKE2B_Flatten(BLAKE2BContext* ctx, unsigned char* space) +{ + PORT_Assert(space != NULL); + if (!space) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + PORT_Memcpy(space, ctx, sizeof(BLAKE2BContext)); + return SECSuccess; +} + +BLAKE2BContext* +BLAKE2B_Resurrect(unsigned char* space, void* arg) +{ + PORT_Assert(space != NULL); + if (!space) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + BLAKE2BContext* ctx = BLAKE2B_NewContext(); + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + + PORT_Memcpy(ctx, space, sizeof(BLAKE2BContext)); + return ctx; +} + +void +BLAKE2B_Clone(BLAKE2BContext* dest, BLAKE2BContext* src) +{ + PORT_Assert(dest != NULL); + PORT_Assert(src != NULL); + if (!dest || !src) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + PORT_Memcpy(dest, src, sizeof(BLAKE2BContext)); +} diff --git a/security/nss/lib/freebl/blake2b.h b/security/nss/lib/freebl/blake2b.h new file mode 100644 index 0000000000..d19a49f0ea --- /dev/null +++ b/security/nss/lib/freebl/blake2b.h @@ -0,0 +1,23 @@ +/* + * blake2b.h - header file for blake2b hash function + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BLAKE_H +#define BLAKE_H + +#include +#include + +struct Blake2bContextStr { + uint64_t h[8]; /* chained state */ + uint64_t t[2]; /* total number of bytes */ + uint64_t f; /* last block flag */ + uint8_t buf[BLAKE2B_BLOCK_LENGTH]; /* input buffer */ + size_t buflen; /* size of remaining bytes in buf */ + size_t outlen; /* digest size */ +}; + +#endif /* BLAKE_H */ diff --git a/security/nss/lib/freebl/blapi.h b/security/nss/lib/freebl/blapi.h new file mode 100644 index 0000000000..94fd80222f --- /dev/null +++ b/security/nss/lib/freebl/blapi.h @@ -0,0 +1,1796 @@ +/* + * blapi.h - public prototypes for the freebl library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _BLAPI_H_ +#define _BLAPI_H_ + +#include "blapit.h" +#include "hasht.h" +#include "cmac.h" +#include "alghmac.h" + +SEC_BEGIN_PROTOS + +/* +** RSA encryption/decryption. When encrypting/decrypting the output +** buffer must be at least the size of the public key modulus. +*/ + +extern SECStatus BL_Init(void); + +/* +** Generate and return a new RSA public and private key. +** Both keys are encoded in a single RSAPrivateKey structure. +** "cx" is the random number generator context +** "keySizeInBits" is the size of the key to be generated, in bits. +** 512, 1024, etc. +** "publicExponent" when not NULL is a pointer to some data that +** represents the public exponent to use. The data is a byte +** encoded integer, in "big endian" order. +*/ +extern RSAPrivateKey *RSA_NewKey(int keySizeInBits, + SECItem *publicExponent); + +/* +** Perform a raw public-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +extern SECStatus RSA_PublicKeyOp(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input); + +/* +** Perform a raw private-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +extern SECStatus RSA_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + +/* +** Perform a raw private-key operation, and check the parameters used in +** the operation for validity by performing a test operation first. +** Length of input and output buffers are equal to key's modulus len. +*/ +extern SECStatus RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + +/* +** Perform a check of private key parameters for consistency. +*/ +extern SECStatus RSA_PrivateKeyCheck(const RSAPrivateKey *key); + +/* +** Given only minimal private key parameters, fill in the rest of the +** parameters. +** +** +** All the entries, including those supplied by the caller, will be +** overwritten with data alocated out of the arena. +** +** If no arena is supplied, one will be created. +** +** The following fields must be supplied in order for this function +** to succeed: +** one of either publicExponent or privateExponent +** two more of the following 5 parameters (not counting the above). +** modulus (n) +** prime1 (p) +** prime2 (q) +** publicExponent (e) +** privateExponent (d) +** +** NOTE: if only the publicExponent, privateExponent, and one prime is given, +** then there may be more than one RSA key that matches that combination. If +** we find 2 possible valid keys that meet this criteria, we return an error. +** If we return the wrong key, and the original modulus is compared to the +** new modulus, both can be factored by calculateing gcd(n_old,n_new) to get +** the common prime. +** +** NOTE: in some cases the publicExponent must be less than 2^23 for this +** function to work correctly. (The case where we have only one of: modulus +** prime1 and prime2). +** +** All parameters will be replaced in the key structure with new parameters +** allocated out of the arena. There is no attempt to free the old structures. +** prime1 will always be greater than prime2 (even if the caller supplies the +** smaller prime as prime1 or the larger prime as prime2). The parameters are +** not overwritten on failure. +** +** While the remaining Chinese remainder theorem parameters (dp,dp, and qinv) +** can also be used in reconstructing the private key, they are currently +** ignored in this implementation. +*/ +extern SECStatus RSA_PopulatePrivateKey(RSAPrivateKey *key); + +/******************************************************************** +** RSA algorithm +*/ + +/******************************************************************** +** Raw signing/encryption/decryption operations. +** +** No padding or formatting will be applied. +** inputLen MUST be equivalent to the modulus size (in bytes). +*/ +extern SECStatus +RSA_SignRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_CheckSignRaw(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + +extern SECStatus +RSA_CheckSignRecoverRaw(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen); + +extern SECStatus +RSA_EncryptRaw(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_DecryptRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +/******************************************************************** +** RSAES-OAEP encryption/decryption, as defined in RFC 3447, Section 7.1. +** +** Note: Only MGF1 is supported as the mask generation function. It will be +** used with maskHashAlg as the inner hash function. +** +** Unless performing Known Answer Tests, "seed" should be NULL, indicating that +** freebl should generate a random value. Otherwise, it should be an octet +** string of seedLen bytes, which should be the same size as the output of +** hashAlg. +*/ +extern SECStatus +RSA_EncryptOAEP(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_DecryptOAEP(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +/******************************************************************** +** RSAES-PKCS1-v1_5 encryption/decryption, as defined in RFC 3447, Section 7.2. +*/ +extern SECStatus +RSA_EncryptBlock(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_DecryptBlock(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +/******************************************************************** +** RSASSA-PSS signing/verifying, as defined in RFC 3447, Section 8.1. +** +** Note: Only MGF1 is supported as the mask generation function. It will be +** used with maskHashAlg as the inner hash function. +** +** Unless performing Known Answer Tests, "salt" should be NULL, indicating that +** freebl should generate a random value. +*/ +extern SECStatus +RSA_SignPSS(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_CheckSignPSS(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + +/******************************************************************** +** RSASSA-PKCS1-v1_5 signing/verifying, as defined in RFC 3447, Section 8.2. +** +** These functions expect as input to be the raw value to be signed. For most +** cases using PKCS1-v1_5, this should be the value of T, the DER-encoded +** DigestInfo structure defined in Section 9.2, Step 2. +** Note: This can also be used for signatures that use PKCS1-v1_5 padding, such +** as the signatures used in SSL/TLS, which sign a raw hash. +*/ +extern SECStatus +RSA_Sign(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *data, + unsigned int dataLen); + +extern SECStatus +RSA_CheckSign(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen); + +extern SECStatus +RSA_CheckSignRecover(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen); + +/******************************************************************** +** DSA signing algorithm +*/ + +/* Generate a new random value within the interval [2, q-1]. +*/ +extern SECStatus DSA_NewRandom(PLArenaPool *arena, const SECItem *q, + SECItem *random); + +/* +** Generate and return a new DSA public and private key pair, +** both of which are encoded into a single DSAPrivateKey struct. +** "params" is a pointer to the PQG parameters for the domain +** Uses a random seed. +*/ +extern SECStatus DSA_NewKey(const PQGParams *params, + DSAPrivateKey **privKey); + +/* signature is caller-supplied buffer of at least 20 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +** On output, signature->len == size of signature in buffer. +** Uses a random seed. +*/ +extern SECStatus DSA_SignDigest(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest); + +/* signature is caller-supplied buffer of at least 20 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +*/ +extern SECStatus DSA_VerifyDigest(DSAPublicKey *key, + const SECItem *signature, + const SECItem *digest); + +/* For FIPS compliance testing. Seed must be exactly 20 bytes long */ +extern SECStatus DSA_NewKeyFromSeed(const PQGParams *params, + const unsigned char *seed, + DSAPrivateKey **privKey); + +/* For FIPS compliance testing. Seed must be exactly 20 bytes. */ +extern SECStatus DSA_SignDigestWithSeed(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed); + +/****************************************************** +** Diffie Helman key exchange algorithm +*/ + +/* Generates parameters for Diffie-Helman key generation. +** primeLen is the length in bytes of prime P to be generated. +*/ +extern SECStatus DH_GenParam(int primeLen, DHParams **params); + +/* Generates a public and private key, both of which are encoded in a single +** DHPrivateKey struct. Params is input, privKey are output. +** This is Phase 1 of Diffie Hellman. +*/ +extern SECStatus DH_NewKey(DHParams *params, + DHPrivateKey **privKey); + +/* +** DH_Derive does the Diffie-Hellman phase 2 calculation, using the +** other party's publicValue, and the prime and our privateValue. +** maxOutBytes is the requested length of the generated secret in bytes. +** A zero value means produce a value of any length up to the size of +** the prime. If successful, derivedSecret->data is set +** to the address of the newly allocated buffer containing the derived +** secret, and derivedSecret->len is the size of the secret produced. +** The size of the secret produced will depend on the value of outBytes. +** If outBytes is 0, the key length will be all the significant bytes of +** the derived secret (leading zeros are dropped). This length could be less +** than the length of the prime. If outBytes is nonzero, the length of the +** produced key will be outBytes long. If the key is truncated, the most +** significant bytes are truncated. If it is expanded, zero bytes are added +** at the beginning. +** It is the caller's responsibility to free the allocated buffer +** containing the derived secret. +*/ +extern SECStatus DH_Derive(SECItem *publicValue, + SECItem *prime, + SECItem *privateValue, + SECItem *derivedSecret, + unsigned int outBytes); + +/* +** KEA_CalcKey returns octet string with the private key for a dual +** Diffie-Helman key generation as specified for government key exchange. +*/ +extern SECStatus KEA_Derive(SECItem *prime, + SECItem *public1, + SECItem *public2, + SECItem *private1, + SECItem *private2, + SECItem *derivedSecret); + +/* + * verify that a KEA or DSA public key is a valid key for this prime and + * subprime domain. + */ +extern PRBool KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime); + +/* verify a value is prime */ +PRBool KEA_PrimeCheck(SECItem *prime); + +/**************************************** + * J-PAKE key transport + */ + +/* Given gx == g^x, create a Schnorr zero-knowledge proof for the value x + * using the specified hash algorithm and signer ID. The signature is + * returned in the values gv and r. testRandom must be NULL for a PRNG + * generated random committment to be used in the sigature. When testRandom + * is non-NULL, that value must contain a value in the subgroup q; that + * value will be used instead of a PRNG-generated committment in order to + * facilitate known-answer tests. + * + * If gxIn is non-NULL then it must contain a pre-computed value of g^x that + * will be used by the function; in this case, the gxOut parameter must be NULL. + * If the gxIn parameter is NULL then gxOut must be non-NULL; in this case + * gxOut will contain the value g^x on output. + * + * gx (if not supplied by the caller), gv, and r will be allocated in the arena. + * The arena is *not* optional so do not pass NULL for the arena parameter. + * The arena should be zeroed when it is freed. + */ +SECStatus +JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *x, + const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r); + +/* Given gx == g^x, verify the Schnorr zero-knowledge proof (gv, r) for the + * value x using the specified hash algorithm and signer ID. + * + * The arena is *not* optional so do not pass NULL for the arena parameter. + */ +SECStatus +JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *peerID, const SECItem *gx, + const SECItem *gv, const SECItem *r); + +/* Call before round 2 with x2, s, and x2s all non-NULL. This will calculate + * base = g^(x1+x3+x4) (mod p) and x2s = x2*s (mod q). The values to send in + * round 2 (A and the proof of knowledge of x2s) can then be calculated with + * JPAKE_Sign using pqg->base = base and x = x2s. + * + * Call after round 2 with x2, s, and x2s all NULL, and passing (gx1, gx2, gx3) + * instead of (gx1, gx3, gx4). This will calculate base = g^(x1+x2+x3). Then call + * JPAKE_Verify with pqg->base = base and then JPAKE_Final. + * + * base and x2s will be allocated in the arena. The arena is *not* optional so + * do not pass NULL for the arena parameter. The arena should be zeroed when it + * is freed. +*/ +SECStatus +JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *gx1, const SECItem *gx3, const SECItem *gx4, + SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s); + +/* K = (B/g^(x2*x4*s))^x2 (mod p) + * + * K will be allocated in the arena. The arena is *not* optional so do not pass + * NULL for the arena parameter. The arena should be zeroed when it is freed. + */ +SECStatus +JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *x2, const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K); + +/****************************************************** +** Elliptic Curve algorithms +*/ + +/* Generates a public and private key, both of which are encoded +** in a single ECPrivateKey struct. Params is input, privKey are +** output. +*/ +extern SECStatus EC_NewKey(ECParams *params, + ECPrivateKey **privKey); + +extern SECStatus EC_NewKeyFromSeed(ECParams *params, + ECPrivateKey **privKey, + const unsigned char *seed, + int seedlen); + +/* Validates an EC public key as described in Section 5.2.2 of + * X9.62. Such validation prevents against small subgroup attacks + * when the ECDH primitive is used with the cofactor. + */ +extern SECStatus EC_ValidatePublicKey(ECParams *params, + SECItem *publicValue); + +/* +** ECDH_Derive performs a scalar point multiplication of a point +** representing a (peer's) public key and a large integer representing +** a private key (its own). Both keys must use the same elliptic curve +** parameters. If the withCofactor parameter is true, the +** multiplication also uses the cofactor associated with the curve +** parameters. The output of this scheme is the x-coordinate of the +** resulting point. If successful, derivedSecret->data is set to the +** address of the newly allocated buffer containing the derived +** secret, and derivedSecret->len is the size of the secret +** produced. It is the caller's responsibility to free the allocated +** buffer containing the derived secret. +*/ +extern SECStatus ECDH_Derive(SECItem *publicValue, + ECParams *params, + SECItem *privateValue, + PRBool withCofactor, + SECItem *derivedSecret); + +/* On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +** On output, signature->len == size of signature in buffer. +** Uses a random seed. +*/ +extern SECStatus ECDSA_SignDigest(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest); + +/* On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +*/ +extern SECStatus ECDSA_VerifyDigest(ECPublicKey *key, + const SECItem *signature, + const SECItem *digest); + +/* Uses the provided seed. */ +extern SECStatus ECDSA_SignDigestWithSeed(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed, + const int seedlen); + +/******************************************/ +/* +** RC4 symmetric stream cypher +*/ + +/* +** Create a new RC4 context suitable for RC4 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +*/ +extern RC4Context *RC4_CreateContext(const unsigned char *key, int len); + +extern RC4Context *RC4_AllocateContext(void); +extern SECStatus RC4_InitContext(RC4Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *, + int, + unsigned int, + unsigned int); + +/* +** Destroy an RC4 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void RC4_DestroyContext(RC4Context *cx, PRBool freeit); + +/* +** Perform RC4 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC4_Encrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform RC4 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC4_Decrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** RC2 symmetric block cypher +*/ + +/* +** Create a new RC2 context suitable for RC2 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC) +** "mode" one of NSS_RC2 or NSS_RC2_CBC +** "effectiveKeyLen" is the effective key length (as specified in +** RFC 2268) in bytes (not bits). +** +** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block +** chaining" mode. +*/ +extern RC2Context *RC2_CreateContext(const unsigned char *key, unsigned int len, + const unsigned char *iv, int mode, + unsigned effectiveKeyLen); +extern RC2Context *RC2_AllocateContext(void); +extern SECStatus RC2_InitContext(RC2Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int effectiveKeyLen, + unsigned int); + +/* +** Destroy an RC2 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void RC2_DestroyContext(RC2Context *cx, PRBool freeit); + +/* +** Perform RC2 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC2_Encrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform RC2 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC2_Decrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** RC5 symmetric block cypher -- 64-bit block size +*/ + +/* +** Create a new RC5 context suitable for RC5 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC) +** "mode" one of NSS_RC5 or NSS_RC5_CBC +** +** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block +** chaining" mode. +*/ +extern RC5Context *RC5_CreateContext(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode); +extern RC5Context *RC5_AllocateContext(void); +extern SECStatus RC5_InitContext(RC5Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int rounds, + unsigned int wordSize); + +/* +** Destroy an RC5 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void RC5_DestroyContext(RC5Context *cx, PRBool freeit); + +/* +** Perform RC5 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC5_Encrypt(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform RC5 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ + +extern SECStatus RC5_Decrypt(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** DES symmetric block cypher +*/ + +/* +** Create a new DES context suitable for DES encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_DES_CBC or +** mode is DES_EDE3_CBC) +** "mode" one of NSS_DES, NSS_DES_CBC, NSS_DES_EDE3 or NSS_DES_EDE3_CBC +** "encrypt" is PR_TRUE if the context will be used for encryption +** +** When mode is set to NSS_DES_CBC or NSS_DES_EDE3_CBC then the DES +** cipher is run in "cipher block chaining" mode. +*/ +extern DESContext *DES_CreateContext(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); +extern DESContext *DES_AllocateContext(void); +extern SECStatus DES_InitContext(DESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + +/* +** Destroy an DES encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void DES_DestroyContext(DESContext *cx, PRBool freeit); + +/* +** Perform DES encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +** +** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH +*/ +extern SECStatus DES_Encrypt(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform DES decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +** +** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH +*/ +extern SECStatus DES_Decrypt(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** SEED symmetric block cypher +*/ +extern SEEDContext * +SEED_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt); +extern SEEDContext *SEED_AllocateContext(void); +extern SECStatus SEED_InitContext(SEEDContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, unsigned int encrypt, + unsigned int); +extern void SEED_DestroyContext(SEEDContext *cx, PRBool freeit); +extern SECStatus +SEED_Encrypt(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); +extern SECStatus +SEED_Decrypt(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** AES symmetric block cypher (Rijndael) +*/ + +/* +** Create a new AES context suitable for AES encryption/decryption. +** "key" raw key data +** "keylen" the number of bytes of key data (16, 24, or 32) +** "blocklen" is the blocksize to use. NOTE: only 16 is supported! +*/ +extern AESContext * +AES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen, unsigned int blocklen); +extern AESContext *AES_AllocateContext(void); +extern SECStatus AES_InitContext(AESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int blocklen); + +/* +** Destroy a AES encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +AES_DestroyContext(AESContext *cx, PRBool freeit); + +/* +** Perform AES encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AES_Encrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform AES decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AES_Decrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); +/* +** Perform AES AEAD operation (either encrypt or decrypt), controlled by +** the context. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +** "params" pointer to an AEAD specific param PKCS #11 param structure +** "paramsLen" length of the param structure pointed to by params +** "aad" addition authenticated data +** "aadLen" the amount of additional authenticated data. +*/ +extern SECStatus +AES_AEAD(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + void *params, unsigned int paramsLen, + const unsigned char *aad, unsigned int aadLen); + +/******************************************/ +/* +** AES key wrap algorithm, RFC 3394 +*/ + +/* +** Create a new AES context suitable for AES encryption/decryption. +** "key" raw key data +** "iv" The 8 byte "initial value" +** "encrypt", a boolean, true for key wrapping, false for unwrapping. +** "keylen" the number of bytes of key data (16, 24, or 32) +*/ +extern AESKeyWrapContext * +AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, + int encrypt, unsigned int keylen); +extern AESKeyWrapContext *AESKeyWrap_AllocateContext(void); +extern SECStatus +AESKeyWrap_InitContext(AESKeyWrapContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int, + unsigned int encrypt, + unsigned int); + +/* +** Destroy a AES KeyWrap context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit); + +/* +** Perform AES key wrap. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform AES key unwrap. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform AES padded key wrap. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_EncryptKWP(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform AES padded key unwrap. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_DecryptKWP(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** Camellia symmetric block cypher +*/ + +/* +** Create a new Camellia context suitable for Camellia encryption/decryption. +** "key" raw key data +** "keylen" the number of bytes of key data (16, 24, or 32) +*/ +extern CamelliaContext * +Camellia_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, unsigned int keylen); + +extern CamelliaContext *Camellia_AllocateContext(void); +extern SECStatus Camellia_InitContext(CamelliaContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int unused); +/* +** Destroy a Camellia encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit); + +/* +** Perform Camellia encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +Camellia_Encrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform Camellia decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +Camellia_Decrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** ChaCha20 block cipher +*/ + +extern SECStatus ChaCha20_InitContext(ChaCha20Context *ctx, + const unsigned char *key, + unsigned int keyLen, + const unsigned char *nonce, + unsigned int nonceLen, + PRUint32 ctr); + +extern ChaCha20Context *ChaCha20_CreateContext(const unsigned char *key, + unsigned int keyLen, + const unsigned char *nonce, + unsigned int nonceLen, + PRUint32 ctr); + +extern void ChaCha20_DestroyContext(ChaCha20Context *ctx, PRBool freeit); + +/******************************************/ +/* +** ChaCha20+Poly1305 AEAD +*/ + +extern SECStatus ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, + const unsigned char *key, + unsigned int keyLen, + unsigned int tagLen); + +extern ChaCha20Poly1305Context *ChaCha20Poly1305_CreateContext( + const unsigned char *key, unsigned int keyLen, unsigned int tagLen); + +extern void ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, + PRBool freeit); + +extern SECStatus ChaCha20Poly1305_Seal( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + +extern SECStatus ChaCha20Poly1305_Open( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + +extern SECStatus ChaCha20Poly1305_Encrypt( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen, unsigned char *tagOut); + +extern SECStatus ChaCha20Poly1305_Decrypt( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen, unsigned char *tagIn); + +extern SECStatus ChaCha20_Xor( + unsigned char *output, const unsigned char *block, unsigned int len, + const unsigned char *k, const unsigned char *nonce, PRUint32 ctr); + +/******************************************/ +/* +** MD5 secure hash function +*/ + +/* +** Hash a null terminated string "src" into "dest" using MD5 +*/ +extern SECStatus MD5_Hash(unsigned char *dest, const char *src); + +/* +** Hash a non-null terminated string "src" into "dest" using MD5 +*/ +extern SECStatus MD5_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + +/* +** Create a new MD5 context +*/ +extern MD5Context *MD5_NewContext(void); + +/* +** Destroy an MD5 secure hash context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void MD5_DestroyContext(MD5Context *cx, PRBool freeit); + +/* +** Reset an MD5 context, preparing it for a fresh round of hashing +*/ +extern void MD5_Begin(MD5Context *cx); + +/* +** Update the MD5 hash function with more data. +** "cx" the context +** "input" the data to hash +** "inputLen" the amount of data to hash +*/ +extern void MD5_Update(MD5Context *cx, + const unsigned char *input, unsigned int inputLen); + +/* +** Finish the MD5 hash function. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (16) is stored +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void MD5_End(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* +** Export the current state of the MD5 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (16) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void MD5_EndRaw(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* + * Return the the size of a buffer needed to flatten the MD5 Context into + * "cx" the context + * returns size; + */ +extern unsigned int MD5_FlattenSize(MD5Context *cx); + +/* + * Flatten the MD5 Context into a buffer: + * "cx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus MD5_Flatten(MD5Context *cx, unsigned char *space); + +/* + * Resurrect a flattened context into a MD5 Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context; + */ +extern MD5Context *MD5_Resurrect(unsigned char *space, void *arg); +extern void MD5_Clone(MD5Context *dest, MD5Context *src); + +/* +** trace the intermediate state info of the MD5 hash. +*/ +extern void MD5_TraceState(MD5Context *cx); + +/******************************************/ +/* +** MD2 secure hash function +*/ + +/* +** Hash a null terminated string "src" into "dest" using MD2 +*/ +extern SECStatus MD2_Hash(unsigned char *dest, const char *src); + +/* +** Create a new MD2 context +*/ +extern MD2Context *MD2_NewContext(void); + +/* +** Destroy an MD2 secure hash context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void MD2_DestroyContext(MD2Context *cx, PRBool freeit); + +/* +** Reset an MD2 context, preparing it for a fresh round of hashing +*/ +extern void MD2_Begin(MD2Context *cx); + +/* +** Update the MD2 hash function with more data. +** "cx" the context +** "input" the data to hash +** "inputLen" the amount of data to hash +*/ +extern void MD2_Update(MD2Context *cx, + const unsigned char *input, unsigned int inputLen); + +/* +** Finish the MD2 hash function. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (16) is stored +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void MD2_End(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* + * Return the the size of a buffer needed to flatten the MD2 Context into + * "cx" the context + * returns size; + */ +extern unsigned int MD2_FlattenSize(MD2Context *cx); + +/* + * Flatten the MD2 Context into a buffer: + * "cx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus MD2_Flatten(MD2Context *cx, unsigned char *space); + +/* + * Resurrect a flattened context into a MD2 Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context; + */ +extern MD2Context *MD2_Resurrect(unsigned char *space, void *arg); +extern void MD2_Clone(MD2Context *dest, MD2Context *src); + +/******************************************/ +/* +** SHA-1 secure hash function +*/ + +/* +** Hash a null terminated string "src" into "dest" using SHA-1 +*/ +extern SECStatus SHA1_Hash(unsigned char *dest, const char *src); + +/* +** Hash a non-null terminated string "src" into "dest" using SHA-1 +*/ +extern SECStatus SHA1_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + +/* +** Create a new SHA-1 context +*/ +extern SHA1Context *SHA1_NewContext(void); + +/* +** Destroy a SHA-1 secure hash context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void SHA1_DestroyContext(SHA1Context *cx, PRBool freeit); + +/* +** Reset a SHA-1 context, preparing it for a fresh round of hashing +*/ +extern void SHA1_Begin(SHA1Context *cx); + +/* +** Update the SHA-1 hash function with more data. +** "cx" the context +** "input" the data to hash +** "inputLen" the amount of data to hash +*/ +extern void SHA1_Update(SHA1Context *cx, const unsigned char *input, + unsigned int inputLen); + +/* +** Finish the SHA-1 hash function. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (20) is stored +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA1_End(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* +** Export the current state of the SHA-1 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 20 bytes of digest data are stored +** "digestLen" where the digest length (20) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA1_EndRaw(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* +** trace the intermediate state info of the SHA1 hash. +*/ +extern void SHA1_TraceState(SHA1Context *cx); + +/* + * Return the the size of a buffer needed to flatten the SHA-1 Context into + * "cx" the context + * returns size; + */ +extern unsigned int SHA1_FlattenSize(SHA1Context *cx); + +/* + * Flatten the SHA-1 Context into a buffer: + * "cx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus SHA1_Flatten(SHA1Context *cx, unsigned char *space); + +/* + * Resurrect a flattened context into a SHA-1 Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context; + */ +extern SHA1Context *SHA1_Resurrect(unsigned char *space, void *arg); +extern void SHA1_Clone(SHA1Context *dest, SHA1Context *src); + +/******************************************/ + +extern SHA224Context *SHA224_NewContext(void); +extern void SHA224_DestroyContext(SHA224Context *cx, PRBool freeit); +extern void SHA224_Begin(SHA224Context *cx); +extern void SHA224_Update(SHA224Context *cx, const unsigned char *input, + unsigned int inputLen); +extern void SHA224_End(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +/* +** Export the current state of the SHA-224 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 28 bytes of digest data are stored +** "digestLen" where the digest length (28) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA224_EndRaw(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA224_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA224_Hash(unsigned char *dest, const char *src); +extern void SHA224_TraceState(SHA224Context *cx); +extern unsigned int SHA224_FlattenSize(SHA224Context *cx); +extern SECStatus SHA224_Flatten(SHA224Context *cx, unsigned char *space); +extern SHA224Context *SHA224_Resurrect(unsigned char *space, void *arg); +extern void SHA224_Clone(SHA224Context *dest, SHA224Context *src); + +/******************************************/ + +extern SHA256Context *SHA256_NewContext(void); +extern void SHA256_DestroyContext(SHA256Context *cx, PRBool freeit); +extern void SHA256_Begin(SHA256Context *cx); +extern void SHA256_Update(SHA256Context *cx, const unsigned char *input, + unsigned int inputLen); +extern void SHA256_End(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +/* +** Export the current state of the SHA-256 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 32 bytes of digest data are stored +** "digestLen" where the digest length (32) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA256_EndRaw(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA256_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA256_Hash(unsigned char *dest, const char *src); +extern void SHA256_TraceState(SHA256Context *cx); +extern unsigned int SHA256_FlattenSize(SHA256Context *cx); +extern SECStatus SHA256_Flatten(SHA256Context *cx, unsigned char *space); +extern SHA256Context *SHA256_Resurrect(unsigned char *space, void *arg); +extern void SHA256_Clone(SHA256Context *dest, SHA256Context *src); + +/******************************************/ + +extern SHA512Context *SHA512_NewContext(void); +extern void SHA512_DestroyContext(SHA512Context *cx, PRBool freeit); +extern void SHA512_Begin(SHA512Context *cx); +extern void SHA512_Update(SHA512Context *cx, const unsigned char *input, + unsigned int inputLen); +/* +** Export the current state of the SHA-512 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 64 bytes of digest data are stored +** "digestLen" where the digest length (64) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA512_EndRaw(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern void SHA512_End(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA512_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA512_Hash(unsigned char *dest, const char *src); +extern void SHA512_TraceState(SHA512Context *cx); +extern unsigned int SHA512_FlattenSize(SHA512Context *cx); +extern SECStatus SHA512_Flatten(SHA512Context *cx, unsigned char *space); +extern SHA512Context *SHA512_Resurrect(unsigned char *space, void *arg); +extern void SHA512_Clone(SHA512Context *dest, SHA512Context *src); + +/******************************************/ + +extern SHA384Context *SHA384_NewContext(void); +extern void SHA384_DestroyContext(SHA384Context *cx, PRBool freeit); +extern void SHA384_Begin(SHA384Context *cx); +extern void SHA384_Update(SHA384Context *cx, const unsigned char *input, + unsigned int inputLen); +extern void SHA384_End(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +/* +** Export the current state of the SHA-384 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 48 bytes of digest data are stored +** "digestLen" where the digest length (48) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA384_EndRaw(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA384_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA384_Hash(unsigned char *dest, const char *src); +extern void SHA384_TraceState(SHA384Context *cx); +extern unsigned int SHA384_FlattenSize(SHA384Context *cx); +extern SECStatus SHA384_Flatten(SHA384Context *cx, unsigned char *space); +extern SHA384Context *SHA384_Resurrect(unsigned char *space, void *arg); +extern void SHA384_Clone(SHA384Context *dest, SHA384Context *src); + +/**************************************** + * implement TLS 1.0 Pseudo Random Function (PRF) and TLS P_hash function + */ + +extern SECStatus +TLS_PRF(const SECItem *secret, const char *label, SECItem *seed, + SECItem *result, PRBool isFIPS); + +extern SECStatus +TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS); + +/******************************************/ +/* +** Implements the Blake2b hash function. +*/ + +/* +** Hash a null terminated string "src" into "dest" using Blake2b +*/ +extern SECStatus BLAKE2B_Hash(unsigned char *dest, const char *src); + +/* +** Hash a non-null terminated string "src" into "dest" using Blake2b +*/ +extern SECStatus BLAKE2B_HashBuf(unsigned char *output, + const unsigned char *input, PRUint32 inlen); + +extern SECStatus BLAKE2B_MAC_HashBuf(unsigned char *output, + const unsigned char *input, + unsigned int inlen, + const unsigned char *key, + unsigned int keylen); + +/* +** Create a new Blake2b context +*/ +extern BLAKE2BContext *BLAKE2B_NewContext(void); + +/* +** Destroy a Blake2b secure hash context. +** "ctx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit); + +/* +** Reset a Blake2b context, preparing it for a fresh round of hashing +*/ +extern SECStatus BLAKE2B_Begin(BLAKE2BContext *ctx); + +extern SECStatus BLAKE2B_MAC_Begin(BLAKE2BContext *ctx, const PRUint8 *key, + const size_t keylen); + +/* +** Update the Blake hash function with more data. +*/ +extern SECStatus BLAKE2B_Update(BLAKE2BContext *ctx, const unsigned char *in, + unsigned int inlen); + +/* +** Finish the Blake hash function. Produce the digested results in "digest" +*/ +extern SECStatus BLAKE2B_End(BLAKE2BContext *ctx, unsigned char *out, + unsigned int *digestLen, size_t maxDigestLen); + +/* + * Return the size of a buffer needed to flatten the Blake2b Context into + * "ctx" the context + * returns size; + */ +extern unsigned int BLAKE2B_FlattenSize(BLAKE2BContext *ctx); + +/* + * Flatten the Blake2b Context into a buffer: + * "ctx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus BLAKE2B_Flatten(BLAKE2BContext *ctx, unsigned char *space); + +/* + * Resurrect a flattened context into a Blake2b Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context + */ +extern BLAKE2BContext *BLAKE2B_Resurrect(unsigned char *space, void *arg); +extern void BLAKE2B_Clone(BLAKE2BContext *dest, BLAKE2BContext *src); + +/******************************************/ +/* +** Pseudo Random Number Generation. FIPS compliance desirable. +*/ + +/* +** Initialize the global RNG context and give it some seed input taken +** from the system. This function is thread-safe and will only allow +** the global context to be initialized once. The seed input is likely +** small, so it is imperative that RNG_RandomUpdate() be called with +** additional seed data before the generator is used. A good way to +** provide the generator with additional entropy is to call +** RNG_SystemInfoForRNG(). Note that NSS_Init() does exactly that. +*/ +extern SECStatus RNG_RNGInit(void); + +/* +** Update the global random number generator with more seeding +** material +*/ +extern SECStatus RNG_RandomUpdate(const void *data, size_t bytes); + +/* +** Generate some random bytes, using the global random number generator +** object. +*/ +extern SECStatus RNG_GenerateGlobalRandomBytes(void *dest, size_t len); + +/* Destroy the global RNG context. After a call to RNG_RNGShutdown() +** a call to RNG_RNGInit() is required in order to use the generator again, +** along with seed data (see the comment above RNG_RNGInit()). +*/ +extern void RNG_RNGShutdown(void); + +extern void RNG_SystemInfoForRNG(void); + +/* + * FIPS 186-2 Change Notice 1 RNG Algorithm 1, used both to + * generate the DSA X parameter and as a generic purpose RNG. + * + * The following two FIPS186Change functions are needed for + * NIST RNG Validation System. + */ + +/* + * FIPS186Change_GenerateX is now deprecated. It will return SECFailure with + * the error set to PR_NOT_IMPLEMENTED_ERROR. + */ +extern SECStatus +FIPS186Change_GenerateX(unsigned char *XKEY, + const unsigned char *XSEEDj, + unsigned char *x_j); + +/* + * When generating the DSA X parameter, we generate 2*GSIZE bytes + * of random output and reduce it mod q. + * + * Input: w, 2*GSIZE bytes + * q, DSA_SUBPRIME_LEN bytes + * Output: xj, DSA_SUBPRIME_LEN bytes + */ +extern SECStatus +FIPS186Change_ReduceModQForDSA(const unsigned char *w, + const unsigned char *q, + unsigned char *xj); + +/* To allow NIST KAT tests */ +extern SECStatus +PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len); + +/* + * The following functions are for FIPS poweron self test and FIPS algorithm + * testing. + */ +extern SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len); + +extern SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len); + +extern SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len); + +extern SECStatus +PRNGTEST_Uninstantiate(void); + +extern SECStatus +PRNGTEST_RunHealthTests(void); + +/* Generate PQGParams and PQGVerify structs. + * Length of seed and length of h both equal length of P. + * All lengths are specified by "j", according to the table above. + * + * The verify parameters will conform to FIPS186-1. + */ +extern SECStatus +PQG_ParamGen(unsigned int j, /* input : determines length of P. */ + PQGParams **pParams, /* output: P Q and G returned here */ + PQGVerify **pVfy); /* output: counter and seed. */ + +/* Generate PQGParams and PQGVerify structs. + * Length of P specified by j. Length of h will match length of P. + * Length of SEED in bytes specified in seedBytes. + * seedBbytes must be in the range [20..255] or an error will result. + * + * The verify parameters will conform to FIPS186-1. + */ +extern SECStatus +PQG_ParamGenSeedLen( + unsigned int j, /* input : determines length of P. */ + unsigned int seedBytes, /* input : length of seed in bytes.*/ + PQGParams **pParams, /* output: P Q and G returned here */ + PQGVerify **pVfy); /* output: counter and seed. */ + +/* Generate PQGParams and PQGVerify structs. + * Length of P specified by L in bits. + * Length of Q specified by N in bits. + * Length of SEED in bytes specified in seedBytes. + * seedBbytes must be in the range [N..L*2] or an error will result. + * + * Not that J uses the above table, L is the length exact. L and N must + * match the table below or an error will result: + * + * L N + * 1024 160 + * 2048 224 + * 2048 256 + * 3072 256 + * + * If N or seedBytes are set to zero, then PQG_ParamGenSeedLen will + * pick a default value (typically the smallest secure value for these + * variables). + * + * The verify parameters will conform to FIPS186-3 using the smallest + * permissible hash for the key strength. + */ +extern SECStatus +PQG_ParamGenV2( + unsigned int L, /* input : determines length of P. */ + unsigned int N, /* input : determines length of Q. */ + unsigned int seedBytes, /* input : length of seed in bytes.*/ + PQGParams **pParams, /* output: P Q and G returned here */ + PQGVerify **pVfy); /* output: counter and seed. */ + +/* Test PQGParams for validity as DSS PQG values. + * If vfy is non-NULL, test PQGParams to make sure they were generated + * using the specified seed, counter, and h values. + * + * Return value indicates whether Verification operation ran successfully + * to completion, but does not indicate if PQGParams are valid or not. + * If return value is SECSuccess, then *pResult has these meanings: + * SECSuccess: PQGParams are valid. + * SECFailure: PQGParams are invalid. + * + * Verify the PQG againts the counter, SEED and h. + * These tests are specified in FIPS 186-3 Appendix A.1.1.1, A.1.1.3, and A.2.2 + * PQG_VerifyParams will automatically choose the appropriate test. + */ + +extern SECStatus PQG_VerifyParams(const PQGParams *params, + const PQGVerify *vfy, SECStatus *result); + +extern void PQG_DestroyParams(PQGParams *params); + +extern void PQG_DestroyVerify(PQGVerify *vfy); + +/* + * clean-up any global tables freebl may have allocated after it starts up. + * This function is not thread safe and should be called only after the + * library has been quiessed. + */ +extern void BL_Cleanup(void); + +/* unload freebl shared library from memory */ +extern void BL_Unload(void); + +/************************************************************************** + * Verify a given Shared library signature * + **************************************************************************/ +PRBool BLAPI_SHVerify(const char *name, PRFuncPtr addr); + +/************************************************************************** + * Verify a given filename's signature * + **************************************************************************/ +PRBool BLAPI_SHVerifyFile(const char *shName); + +/************************************************************************** + * Verify Are Own Shared library signature * + **************************************************************************/ +PRBool BLAPI_VerifySelf(const char *name); + +/*********************************************************************/ +extern const SECHashObject *HASH_GetRawHashObject(HASH_HashType hashType); + +extern void BL_SetForkState(PRBool forked); + +/* +** pepare an ECParam structure from DEREncoded params + */ +extern SECStatus EC_FillParams(PLArenaPool *arena, + const SECItem *encodedParams, ECParams *params); +extern SECStatus EC_DecodeParams(const SECItem *encodedParams, + ECParams **ecparams); +extern SECStatus EC_CopyParams(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams); + +/* + * use the internal table to get the size in bytes of a single EC point + */ +extern int EC_GetPointSize(const ECParams *params); + +SEC_END_PROTOS + +#endif /* _BLAPI_H_ */ diff --git a/security/nss/lib/freebl/blapii.h b/security/nss/lib/freebl/blapii.h new file mode 100644 index 0000000000..a373b84d3b --- /dev/null +++ b/security/nss/lib/freebl/blapii.h @@ -0,0 +1,111 @@ +/* + * blapii.h - private data structures and prototypes for the freebl library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _BLAPII_H_ +#define _BLAPII_H_ + +#include "blapit.h" +#include "mpi.h" + +/* max block size of supported block ciphers */ +#define MAX_BLOCK_SIZE 16 + +typedef SECStatus (*freeblCipherFunc)(void *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + unsigned int blocksize); +typedef SECStatus (*freeblAeadFunc)(void *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + void *params, unsigned int paramsLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); +typedef void (*freeblDestroyFunc)(void *cx, PRBool freeit); + +SEC_BEGIN_PROTOS + +#ifndef NSS_FIPS_DISABLED +SECStatus BL_FIPSEntryOK(PRBool freeblOnly, PRBool rerun); +PRBool BL_POSTRan(PRBool freeblOnly); +#endif + +#if defined(XP_UNIX) && !defined(NO_FORK_CHECK) + +extern PRBool bl_parentForkedAfterC_Initialize; + +#define SKIP_AFTER_FORK(x) \ + if (!bl_parentForkedAfterC_Initialize) \ + x + +#else + +#define SKIP_AFTER_FORK(x) x + +#endif + +SEC_END_PROTOS + +#if defined(NSS_X86_OR_X64) +#define HAVE_UNALIGNED_ACCESS 1 +#endif + +#if defined(__clang__) +#define HAVE_NO_SANITIZE_ATTR __has_attribute(no_sanitize) +#else +#define HAVE_NO_SANITIZE_ATTR 0 +#endif + +/* Alignment helpers. */ +#if defined(_MSC_VER) +#define pre_align __declspec(align(16)) +#define post_align +#elif defined(__GNUC__) +#define pre_align +#define post_align __attribute__((aligned(16))) +#else +#define pre_align +#define post_align +#endif + +#if defined(HAVE_UNALIGNED_ACCESS) && HAVE_NO_SANITIZE_ATTR +#define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment"))) +#else +#define NO_SANITIZE_ALIGNMENT +#endif + +#undef HAVE_NO_SANITIZE_ATTR + +SECStatus RSA_Init(); +SECStatus generate_prime(mp_int *prime, int primeLen); + +/* Freebl state. */ +PRBool aesni_support(); +PRBool clmul_support(); +PRBool sha_support(); +PRBool avx_support(); +PRBool avx2_support(); +PRBool ssse3_support(); +PRBool sse4_1_support(); +PRBool sse4_2_support(); +PRBool arm_neon_support(); +PRBool arm_aes_support(); +PRBool arm_pmull_support(); +PRBool arm_sha1_support(); +PRBool arm_sha2_support(); +PRBool ppc_crypto_support(); + +#ifdef NSS_FIPS_DISABLED +#define BLAPI_CLEAR_STACK(stack_size) +#else +#define BLAPI_CLEAR_STACK(stack_size) \ + { \ + volatile char _stkclr[stack_size]; \ + PORT_Memset((void *)&_stkclr[0], 0, stack_size); \ + } +#endif + +#endif /* _BLAPII_H_ */ diff --git a/security/nss/lib/freebl/blapit.h b/security/nss/lib/freebl/blapit.h new file mode 100644 index 0000000000..ff0d666397 --- /dev/null +++ b/security/nss/lib/freebl/blapit.h @@ -0,0 +1,455 @@ +/* + * blapit.h - public data structures for the freebl library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _BLAPIT_H_ +#define _BLAPIT_H_ + +#include "seccomon.h" +#include "prlink.h" +#include "plarena.h" +#include "ecl-exp.h" + +/* RC2 operation modes */ +#define NSS_RC2 0 +#define NSS_RC2_CBC 1 + +/* RC5 operation modes */ +#define NSS_RC5 0 +#define NSS_RC5_CBC 1 + +/* DES operation modes */ +#define NSS_DES 0 +#define NSS_DES_CBC 1 +#define NSS_DES_EDE3 2 +#define NSS_DES_EDE3_CBC 3 + +#define DES_KEY_LENGTH 8 /* Bytes */ + +/* AES operation modes */ +#define NSS_AES 0 +#define NSS_AES_CBC 1 +#define NSS_AES_CTS 2 +#define NSS_AES_CTR 3 +#define NSS_AES_GCM 4 + +/* Camellia operation modes */ +#define NSS_CAMELLIA 0 +#define NSS_CAMELLIA_CBC 1 + +/* SEED operation modes */ +#define NSS_SEED 0 +#define NSS_SEED_CBC 1 + +#define DSA1_SUBPRIME_LEN 20 /* Bytes */ +#define DSA1_SIGNATURE_LEN (DSA1_SUBPRIME_LEN * 2) /* Bytes */ +#define DSA_MAX_SUBPRIME_LEN 32 /* Bytes */ +#define DSA_MAX_SIGNATURE_LEN (DSA_MAX_SUBPRIME_LEN * 2) /* Bytes */ + +/* + * Mark the old defines as deprecated. This will warn code that expected + * DSA1 only that they need to change if the are to support DSA2. + */ +#if defined(__GNUC__) && (__GNUC__ > 3) +/* make GCC warn when we use these #defines */ +typedef int __BLAPI_DEPRECATED __attribute__((deprecated)); +#define DSA_SUBPRIME_LEN ((__BLAPI_DEPRECATED)DSA1_SUBPRIME_LEN) +#define DSA_SIGNATURE_LEN ((__BLAPI_DEPRECATED)DSA1_SIGNATURE_LEN) +#define DSA_Q_BITS ((__BLAPI_DEPRECATED)(DSA1_SUBPRIME_LEN * 8)) +#else +#ifdef _WIN32 +/* This magic gets the windows compiler to give us a deprecation + * warning */ +#pragma deprecated(DSA_SUBPRIME_LEN, DSA_SIGNATURE_LEN, DSA_QBITS) +#endif +#define DSA_SUBPRIME_LEN DSA1_SUBPRIME_LEN +#define DSA_SIGNATURE_LEN DSA1_SIGNATURE_LEN +#define DSA_Q_BITS (DSA1_SUBPRIME_LEN * 8) +#endif + +/* XXX We shouldn't have to hard code this limit. For + * now, this is the quickest way to support ECDSA signature + * processing (ECDSA signature lengths depend on curve + * size). This limit is sufficient for curves upto + * 576 bits. + */ +#define MAX_ECKEY_LEN 72 /* Bytes */ + +#define EC_MAX_KEY_BITS 521 /* in bits */ +#define EC_MIN_KEY_BITS 256 /* in bits */ + +/* EC point compression format */ +#define EC_POINT_FORM_COMPRESSED_Y0 0x02 +#define EC_POINT_FORM_COMPRESSED_Y1 0x03 +#define EC_POINT_FORM_UNCOMPRESSED 0x04 +#define EC_POINT_FORM_HYBRID_Y0 0x06 +#define EC_POINT_FORM_HYBRID_Y1 0x07 + +/* + * Number of bytes each hash algorithm produces + */ +#define MD2_LENGTH 16 /* Bytes */ +#define MD5_LENGTH 16 /* Bytes */ +#define SHA1_LENGTH 20 /* Bytes */ +#define SHA256_LENGTH 32 /* bytes */ +#define SHA384_LENGTH 48 /* bytes */ +#define SHA512_LENGTH 64 /* bytes */ +#define BLAKE2B512_LENGTH 64 /* Bytes */ +#define HASH_LENGTH_MAX SHA512_LENGTH + +/* + * Input block size for each hash algorithm. + */ + +#define MD2_BLOCK_LENGTH 64 /* bytes */ +#define MD5_BLOCK_LENGTH 64 /* bytes */ +#define SHA1_BLOCK_LENGTH 64 /* bytes */ +#define SHA224_BLOCK_LENGTH 64 /* bytes */ +#define SHA256_BLOCK_LENGTH 64 /* bytes */ +#define SHA384_BLOCK_LENGTH 128 /* bytes */ +#define SHA512_BLOCK_LENGTH 128 /* bytes */ +#define BLAKE2B_BLOCK_LENGTH 128 /* Bytes */ +#define HASH_BLOCK_LENGTH_MAX SHA512_BLOCK_LENGTH + +#define AES_BLOCK_SIZE 16 /* bytes */ +#define AES_KEY_WRAP_BLOCK_SIZE (AES_BLOCK_SIZE / 2) +#define AES_KEY_WRAP_IV_BYTES AES_KEY_WRAP_BLOCK_SIZE + +#define AES_128_KEY_LENGTH 16 /* bytes */ +#define AES_192_KEY_LENGTH 24 /* bytes */ +#define AES_256_KEY_LENGTH 32 /* bytes */ + +#define CAMELLIA_BLOCK_SIZE 16 /* bytes */ + +#define SEED_BLOCK_SIZE 16 /* bytes */ +#define SEED_KEY_LENGTH 16 /* bytes */ + +#define NSS_FREEBL_DEFAULT_CHUNKSIZE 2048 + +#define BLAKE2B_KEY_SIZE 64 + +/* + * These values come from the initial key size limits from the PKCS #11 + * module. They may be arbitrarily adjusted to any value freebl supports. + */ +#define RSA_MIN_MODULUS_BITS 128 +#define RSA_MAX_MODULUS_BITS 16384 +#define RSA_MAX_EXPONENT_BITS 64 +#define DH_MIN_P_BITS 128 +#define DH_MAX_P_BITS 16384 + +/* + * The FIPS 186-1 algorithm for generating primes P and Q allows only 9 + * distinct values for the length of P, and only one value for the + * length of Q. + * The algorithm uses a variable j to indicate which of the 9 lengths + * of P is to be used. + * The following table relates j to the lengths of P and Q in bits. + * + * j bits in P bits in Q + * _ _________ _________ + * 0 512 160 + * 1 576 160 + * 2 640 160 + * 3 704 160 + * 4 768 160 + * 5 832 160 + * 6 896 160 + * 7 960 160 + * 8 1024 160 + * + * The FIPS-186-1 compliant PQG generator takes j as an input parameter. + * + * FIPS 186-3 algorithm specifies 4 distinct P and Q sizes: + * + * bits in P bits in Q + * _________ _________ + * 1024 160 + * 2048 224 + * 2048 256 + * 3072 256 + * + * The FIPS-186-3 complaiant PQG generator (PQG V2) takes arbitrary p and q + * lengths as input and returns an error if they aren't in this list. + */ + +#define DSA1_Q_BITS 160 +#define DSA_MAX_P_BITS 3072 +#define DSA_MIN_P_BITS 512 +#define DSA_MAX_Q_BITS 256 +#define DSA_MIN_Q_BITS 160 + +#if DSA_MAX_Q_BITS != DSA_MAX_SUBPRIME_LEN * 8 +#error "Inconsistent declaration of DSA SUBPRIME/Q parameters in blapit.h" +#endif + +/* + * function takes desired number of bits in P, + * returns index (0..8) or -1 if number of bits is invalid. + */ +#define PQG_PBITS_TO_INDEX(bits) \ + (((bits) < 512 || (bits) > 1024 || (bits) % 64) ? -1 : (int)((bits)-512) / 64) + +/* + * function takes index (0-8) + * returns number of bits in P for that index, or -1 if index is invalid. + */ +#define PQG_INDEX_TO_PBITS(j) (((unsigned)(j) > 8) ? -1 : (512 + 64 * (j))) + +/* When we are generating a gcm iv from a random number, we need to calculate + * an acceptable iteration count to avoid birthday attacks. (randomly + * generating the same IV twice). + * + * We use the approximation n = sqrt(2*m*p) to find an acceptable n given m + * and p. + * where n is the number of iterations. + * m is the number of possible random values. + * p is the probability of collision (0-1). + * + * We want to calculate the constant number GCM_IV_RANDOM_BIRTHDAY_BITS, which + * is the number of bits we subtract off of the length of the iv (in bits) to + * get a safe count value (log2). + * + * Since we do the calculation in bits, so we need to take the whole + * equation log2: + * log2 n = (1+(log2 m)+(log2 p))/2 + * Since p < 1, log2 p is negative. Also note that the length of the iv in + * bits is log2 m, so if we set GCMIV_RANDOM_BIRTHDAY_BITS =- log2 p - 1. + * then we can calculate a safe counter value with: + * n = 2^((ivLenBits - GCMIV_RANDOM_BIRTHDAY_BITS)/2) + * + * If we arbitrarily set p = 10^-18 (1 chance in trillion trillion operation) + * we get GCMIV_RANDOM_BIRTHDAY_BITS = -(-18)/.301 -1 = 59 (.301 = log10 2) + * GCMIV_RANDOM_BIRTHDAY_BITS should be at least 59, call it a round 64. NOTE: + * the variable IV size for TLS is 64 bits, which explains why it's not safe + * to use a random value for the nonce in TLS. */ +#define GCMIV_RANDOM_BIRTHDAY_BITS 64 + +/* flag to tell BLAPI_Verify* to rerun the post and integrity tests */ +#define BLAPI_FIPS_RERUN_FLAG '\377' /* 0xff, 255 invalide code for UFT8/ASCII */ +#define BLAPI_FIPS_RERUN_FLAG_STRING "\377" /* The above as a C string */ + +/*************************************************************************** +** Opaque objects +*/ + +struct DESContextStr; +struct RC2ContextStr; +struct RC4ContextStr; +struct RC5ContextStr; +struct AESContextStr; +struct CamelliaContextStr; +struct MD2ContextStr; +struct MD5ContextStr; +struct SHA1ContextStr; +struct SHA256ContextStr; +struct SHA512ContextStr; +struct AESKeyWrapContextStr; +struct SEEDContextStr; +struct ChaCha20ContextStr; +struct ChaCha20Poly1305ContextStr; +struct Blake2bContextStr; + +typedef struct DESContextStr DESContext; +typedef struct RC2ContextStr RC2Context; +typedef struct RC4ContextStr RC4Context; +typedef struct RC5ContextStr RC5Context; +typedef struct AESContextStr AESContext; +typedef struct CamelliaContextStr CamelliaContext; +typedef struct MD2ContextStr MD2Context; +typedef struct MD5ContextStr MD5Context; +typedef struct SHA1ContextStr SHA1Context; +typedef struct SHA256ContextStr SHA256Context; +/* SHA224Context is really a SHA256ContextStr. This is not a mistake. */ +typedef struct SHA256ContextStr SHA224Context; +typedef struct SHA512ContextStr SHA512Context; +/* SHA384Context is really a SHA512ContextStr. This is not a mistake. */ +typedef struct SHA512ContextStr SHA384Context; +typedef struct AESKeyWrapContextStr AESKeyWrapContext; +typedef struct SEEDContextStr SEEDContext; +typedef struct ChaCha20ContextStr ChaCha20Context; +typedef struct ChaCha20Poly1305ContextStr ChaCha20Poly1305Context; +typedef struct Blake2bContextStr BLAKE2BContext; + +/*************************************************************************** +** RSA Public and Private Key structures +*/ + +/* member names from PKCS#1, section 7.1 */ +struct RSAPublicKeyStr { + PLArenaPool *arena; + SECItem modulus; + SECItem publicExponent; +}; +typedef struct RSAPublicKeyStr RSAPublicKey; + +/* member names from PKCS#1, section 7.2 */ +struct RSAPrivateKeyStr { + PLArenaPool *arena; + SECItem version; + SECItem modulus; + SECItem publicExponent; + SECItem privateExponent; + SECItem prime1; + SECItem prime2; + SECItem exponent1; + SECItem exponent2; + SECItem coefficient; +}; +typedef struct RSAPrivateKeyStr RSAPrivateKey; + +/*************************************************************************** +** DSA Public and Private Key and related structures +*/ + +struct PQGParamsStr { + PLArenaPool *arena; + SECItem prime; /* p */ + SECItem subPrime; /* q */ + SECItem base; /* g */ + /* XXX chrisk: this needs to be expanded to hold j and validationParms (RFC2459 7.3.2) */ +}; +typedef struct PQGParamsStr PQGParams; + +struct PQGVerifyStr { + PLArenaPool *arena; /* includes this struct, seed, & h. */ + unsigned int counter; + SECItem seed; + SECItem h; +}; +typedef struct PQGVerifyStr PQGVerify; + +struct DSAPublicKeyStr { + PQGParams params; + SECItem publicValue; +}; +typedef struct DSAPublicKeyStr DSAPublicKey; + +struct DSAPrivateKeyStr { + PQGParams params; + SECItem publicValue; + SECItem privateValue; +}; +typedef struct DSAPrivateKeyStr DSAPrivateKey; + +/*************************************************************************** +** Diffie-Hellman Public and Private Key and related structures +** Structure member names suggested by PKCS#3. +*/ + +struct DHParamsStr { + PLArenaPool *arena; + SECItem prime; /* p */ + SECItem base; /* g */ +}; +typedef struct DHParamsStr DHParams; + +struct DHPublicKeyStr { + PLArenaPool *arena; + SECItem prime; + SECItem base; + SECItem publicValue; +}; +typedef struct DHPublicKeyStr DHPublicKey; + +struct DHPrivateKeyStr { + PLArenaPool *arena; + SECItem prime; + SECItem base; + SECItem publicValue; + SECItem privateValue; +}; +typedef struct DHPrivateKeyStr DHPrivateKey; + +/*************************************************************************** +** Data structures used for elliptic curve parameters and +** public and private keys. +*/ + +/* +** The ECParams data structures can encode elliptic curve +** parameters for both GFp and GF2m curves. +*/ + +typedef enum { ec_params_explicit, + ec_params_named +} ECParamsType; + +typedef enum { ec_field_GFp = 1, + ec_field_GF2m, + ec_field_plain +} ECFieldType; + +struct ECFieldIDStr { + int size; /* field size in bits */ + ECFieldType type; + union { + SECItem prime; /* prime p for (GFp) */ + SECItem poly; /* irreducible binary polynomial for (GF2m) */ + } u; + int k1; /* first coefficient of pentanomial or + * the only coefficient of trinomial + */ + int k2; /* two remaining coefficients of pentanomial */ + int k3; +}; +typedef struct ECFieldIDStr ECFieldID; + +struct ECCurveStr { + SECItem a; /* contains octet stream encoding of + * field element (X9.62 section 4.3.3) + */ + SECItem b; + SECItem seed; +}; +typedef struct ECCurveStr ECCurve; + +struct ECParamsStr { + PLArenaPool *arena; + ECParamsType type; + ECFieldID fieldID; + ECCurve curve; + SECItem base; + SECItem order; + int cofactor; + SECItem DEREncoding; + ECCurveName name; + SECItem curveOID; +}; +typedef struct ECParamsStr ECParams; + +struct ECPublicKeyStr { + ECParams ecParams; + SECItem publicValue; /* elliptic curve point encoded as + * octet stream. + */ +}; +typedef struct ECPublicKeyStr ECPublicKey; + +struct ECPrivateKeyStr { + ECParams ecParams; + SECItem publicValue; /* encoded ec point */ + SECItem privateValue; /* private big integer */ + SECItem version; /* As per SEC 1, Appendix C, Section C.4 */ +}; +typedef struct ECPrivateKeyStr ECPrivateKey; + +typedef void *(*BLapiAllocateFunc)(void); +typedef void (*BLapiDestroyContextFunc)(void *cx, PRBool freeit); +typedef SECStatus (*BLapiInitContextFunc)(void *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *, + int, + unsigned int, + unsigned int); +typedef SECStatus (*BLapiEncrypt)(void *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +#endif /* _BLAPIT_H_ */ diff --git a/security/nss/lib/freebl/blinit.c b/security/nss/lib/freebl/blinit.c new file mode 100644 index 0000000000..b8773b063b --- /dev/null +++ b/security/nss/lib/freebl/blinit.c @@ -0,0 +1,573 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapii.h" +#include "mpi.h" +#include "secerr.h" +#include "prtypes.h" +#include "prinit.h" +#include "prenv.h" + +#if defined(_MSC_VER) && !defined(_M_IX86) +#include /* for _xgetbv() */ +#endif + +#if defined(_WIN64) && defined(__aarch64__) +#include +#endif + +#if defined(DARWIN) +#include +#endif + +static PRCallOnceType coFreeblInit; + +/* State variables. */ +static PRBool aesni_support_ = PR_FALSE; +static PRBool clmul_support_ = PR_FALSE; +static PRBool sha_support_ = PR_FALSE; +static PRBool avx_support_ = PR_FALSE; +static PRBool avx2_support_ = PR_FALSE; +static PRBool ssse3_support_ = PR_FALSE; +static PRBool sse4_1_support_ = PR_FALSE; +static PRBool sse4_2_support_ = PR_FALSE; +static PRBool arm_neon_support_ = PR_FALSE; +static PRBool arm_aes_support_ = PR_FALSE; +static PRBool arm_sha1_support_ = PR_FALSE; +static PRBool arm_sha2_support_ = PR_FALSE; +static PRBool arm_pmull_support_ = PR_FALSE; +static PRBool ppc_crypto_support_ = PR_FALSE; + +#ifdef NSS_X86_OR_X64 +/* + * Adapted from the example code in "How to detect New Instruction support in + * the 4th generation Intel Core processor family" by Max Locktyukhin. + * https://www.intel.com/content/dam/develop/external/us/en/documents/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf + * + * XGETBV: + * Reads an extended control register (XCR) specified by ECX into EDX:EAX. + */ +static PRBool +check_xcr0_ymm() +{ + PRUint32 xcr0; +#if defined(_MSC_VER) +#if defined(_M_IX86) + __asm { + mov ecx, 0 + xgetbv + mov xcr0, eax + } +#else + xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ +#endif /* _M_IX86 */ +#else /* _MSC_VER */ + /* Old OSX compilers don't support xgetbv. Use byte form. */ + __asm__(".byte 0x0F, 0x01, 0xd0" + : "=a"(xcr0) + : "c"(0) + : "%edx"); +#endif /* _MSC_VER */ + /* Check if xmm and ymm state are enabled in XCR0. */ + return (xcr0 & 6) == 6; +} + +#define ECX_AESNI (1 << 25) +#define ECX_CLMUL (1 << 1) +#define ECX_XSAVE (1 << 26) +#define ECX_OSXSAVE (1 << 27) +#define ECX_AVX (1 << 28) +#define EBX_AVX2 (1 << 5) +#define EBX_BMI1 (1 << 3) +#define EBX_BMI2 (1 << 8) +#define EBX_SHA (1 << 29) +#define ECX_FMA (1 << 12) +#define ECX_MOVBE (1 << 22) +#define ECX_SSSE3 (1 << 9) +#define ECX_SSE4_1 (1 << 19) +#define ECX_SSE4_2 (1 << 20) +#define AVX_BITS (ECX_XSAVE | ECX_OSXSAVE | ECX_AVX) +#define AVX2_EBX_BITS (EBX_AVX2 | EBX_BMI1 | EBX_BMI2) +#define AVX2_ECX_BITS (ECX_FMA | ECX_MOVBE) + +void +CheckX86CPUSupport() +{ + unsigned long eax, ebx, ecx, edx; + unsigned long eax7, ebx7, ecx7, edx7; + char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); + char *disable_pclmul = PR_GetEnvSecure("NSS_DISABLE_PCLMUL"); + char *disable_hw_sha = PR_GetEnvSecure("NSS_DISABLE_HW_SHA"); + char *disable_avx = PR_GetEnvSecure("NSS_DISABLE_AVX"); + char *disable_avx2 = PR_GetEnvSecure("NSS_DISABLE_AVX2"); + char *disable_ssse3 = PR_GetEnvSecure("NSS_DISABLE_SSSE3"); + char *disable_sse4_1 = PR_GetEnvSecure("NSS_DISABLE_SSE4_1"); + char *disable_sse4_2 = PR_GetEnvSecure("NSS_DISABLE_SSE4_2"); + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); + freebl_cpuid(7, &eax7, &ebx7, &ecx7, &edx7); + aesni_support_ = (PRBool)((ecx & ECX_AESNI) != 0 && disable_hw_aes == NULL); + clmul_support_ = (PRBool)((ecx & ECX_CLMUL) != 0 && disable_pclmul == NULL); + sha_support_ = (PRBool)((ebx7 & EBX_SHA) != 0 && disable_hw_sha == NULL); + /* For AVX we ensure that: + * - The AVX, OSXSAVE, and XSAVE bits of ECX from CPUID(EAX=1) are set, and + * - the SSE and AVX state bits of XCR0 are set (check_xcr0_ymm). + */ + avx_support_ = (PRBool)((ecx & AVX_BITS) == AVX_BITS) && check_xcr0_ymm() && + disable_avx == NULL; + /* For AVX2 we ensure that: + * - AVX is supported, + * - the AVX2, BMI1, and BMI2 bits of EBX from CPUID(EAX=7) are set, and + * - the FMA, and MOVBE bits of ECX from CPUID(EAX=1) are set. + * We do not check for LZCNT support. + */ + avx2_support_ = (PRBool)(avx_support_ == PR_TRUE && + (ebx7 & AVX2_EBX_BITS) == AVX2_EBX_BITS && + (ecx & AVX2_ECX_BITS) == AVX2_ECX_BITS && + disable_avx2 == NULL); + ssse3_support_ = (PRBool)((ecx & ECX_SSSE3) != 0 && + disable_ssse3 == NULL); + sse4_1_support_ = (PRBool)((ecx & ECX_SSE4_1) != 0 && + disable_sse4_1 == NULL); + sse4_2_support_ = (PRBool)((ecx & ECX_SSE4_2) != 0 && + disable_sse4_2 == NULL); +} +#endif /* NSS_X86_OR_X64 */ + +/* clang-format off */ +#if (defined(__aarch64__) || defined(__arm__)) && !defined(TARGET_OS_IPHONE) +#ifndef __has_include +#define __has_include(x) 0 +#endif +#if (__has_include() || defined(__linux__)) && \ + defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__) +/* This might be conflict with host compiler */ +#if !defined(__ANDROID__) +#include +#endif +extern unsigned long getauxval(unsigned long type) __attribute__((weak)); +#elif defined(__arm__) || (!defined(__OpenBSD__) && !defined(_WIN64)) +static unsigned long (*getauxval)(unsigned long) = NULL; +#endif /* defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__)*/ + +#if defined(__FreeBSD__) && !defined(__aarch64__) && __has_include() +/* Avoid conflict with static declaration above */ +#define getauxval freebl_getauxval +static unsigned long getauxval(unsigned long type) +{ + /* Only AT_HWCAP* return unsigned long */ + if (type != AT_HWCAP && type != AT_HWCAP2) { + return 0; + } + + unsigned long ret = 0; + elf_aux_info(type, &ret, sizeof(ret)); + return ret; +} +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +#endif /* defined(__aarch64__) || defined(__arm__) */ +/* clang-format on */ + +#if defined(__aarch64__) + +#if defined(__linux__) +// Defines from hwcap.h in Linux kernel - ARM64 +#ifndef HWCAP_AES +#define HWCAP_AES (1 << 3) +#endif +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1 << 4) +#endif +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1 << 5) +#endif +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1 << 6) +#endif +#endif /* defined(__linux__) */ + +#if defined(__FreeBSD__) +#include +#include +// Support for older version of armreg.h +#ifndef ID_AA64ISAR0_AES_VAL +#define ID_AA64ISAR0_AES_VAL ID_AA64ISAR0_AES +#endif +#ifndef ID_AA64ISAR0_SHA1_VAL +#define ID_AA64ISAR0_SHA1_VAL ID_AA64ISAR0_SHA1 +#endif +#ifndef ID_AA64ISAR0_SHA2_VAL +#define ID_AA64ISAR0_SHA2_VAL ID_AA64ISAR0_SHA2 +#endif +#endif /* defined(__FreeBSD__) */ + +#if defined(__OpenBSD__) +#include +#include +#include +#endif /* defined(__OpenBSD__) */ + +void +CheckARMSupport() +{ +#if defined(_WIN64) + BOOL arm_crypto_support = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); + arm_aes_support_ = arm_crypto_support; + arm_pmull_support_ = arm_crypto_support; + arm_sha1_support_ = arm_crypto_support; + arm_sha2_support_ = arm_crypto_support; +#elif defined(__linux__) + if (getauxval) { + long hwcaps = getauxval(AT_HWCAP); + arm_aes_support_ = (hwcaps & HWCAP_AES) == HWCAP_AES; + arm_pmull_support_ = (hwcaps & HWCAP_PMULL) == HWCAP_PMULL; + arm_sha1_support_ = (hwcaps & HWCAP_SHA1) == HWCAP_SHA1; + arm_sha2_support_ = (hwcaps & HWCAP_SHA2) == HWCAP_SHA2; + } +#elif defined(__FreeBSD__) + /* qemu-user does not support register access from userspace */ + if (PR_GetEnvSecure("QEMU_EMULATING") == NULL) { + uint64_t isar0 = READ_SPECIALREG(id_aa64isar0_el1); + arm_aes_support_ = ID_AA64ISAR0_AES_VAL(isar0) >= ID_AA64ISAR0_AES_BASE; + arm_pmull_support_ = ID_AA64ISAR0_AES_VAL(isar0) >= ID_AA64ISAR0_AES_PMULL; + arm_sha1_support_ = ID_AA64ISAR0_SHA1_VAL(isar0) >= ID_AA64ISAR0_SHA1_BASE; + arm_sha2_support_ = ID_AA64ISAR0_SHA2_VAL(isar0) >= ID_AA64ISAR0_SHA2_BASE; + } +#elif defined(__OpenBSD__) + const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 }; + uint64_t isar0; + size_t len = sizeof(isar0); + if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) < 0) + return; + arm_aes_support_ = ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_BASE; + arm_pmull_support_ = ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL; + arm_sha1_support_ = ID_AA64ISAR0_SHA1(isar0) >= ID_AA64ISAR0_SHA1_BASE; + arm_sha2_support_ = ID_AA64ISAR0_SHA2(isar0) >= ID_AA64ISAR0_SHA2_BASE; +#elif defined(__ARM_FEATURE_CRYPTO) + /* + * Although no feature detection, default compiler option allows ARM + * Crypto Extension. + */ + arm_aes_support_ = PR_TRUE; + arm_pmull_support_ = PR_TRUE; + arm_sha1_support_ = PR_TRUE; + arm_sha2_support_ = PR_TRUE; +#endif + /* aarch64 must support NEON. */ + arm_neon_support_ = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON") == NULL; + arm_aes_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_AES") == NULL; + arm_pmull_support_ &= PR_GetEnvSecure("NSS_DISABLE_PMULL") == NULL; + arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL; + arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL; +} +#endif /* defined(__aarch64__) */ + +#if defined(__arm__) +// Defines from hwcap.h in Linux kernel - ARM +/* + * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + */ +#ifndef HWCAP_NEON +#define HWCAP_NEON (1 << 12) +#endif + +/* + * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 + */ +#ifndef HWCAP2_AES +#define HWCAP2_AES (1 << 0) +#endif +#ifndef HWCAP2_PMULL +#define HWCAP2_PMULL (1 << 1) +#endif +#ifndef HWCAP2_SHA1 +#define HWCAP2_SHA1 (1 << 2) +#endif +#ifndef HWCAP2_SHA2 +#define HWCAP2_SHA2 (1 << 3) +#endif + +PRBool +GetNeonSupport() +{ + char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON"); + if (disable_arm_neon) { + return PR_FALSE; + } +#if defined(__ARM_NEON) || defined(__ARM_NEON__) + // Compiler generates NEON instruction as default option. + // If no getauxval, compiler generate NEON instruction by default, + // we should allow NOEN support. + return PR_TRUE; +#elif !defined(__ANDROID__) + // Android's cpu-features.c detects features by the following logic + // + // - Call getauxval(AT_HWCAP) + // - Parse /proc/self/auxv if getauxval is nothing or returns 0 + // - Parse /proc/cpuinfo if both cannot detect features + // + // But we don't use it for Android since Android document + // (https://developer.android.com/ndk/guides/cpu-features) says + // one problem with AT_HWCAP sometimes devices (Nexus 4 and emulator) + // are mistaken for IDIV. + if (getauxval) { + return (getauxval(AT_HWCAP) & HWCAP_NEON); + } +#endif /* defined(__ARM_NEON) || defined(__ARM_NEON__) */ + return PR_FALSE; +} + +#ifdef __linux__ +static long +ReadCPUInfoForHWCAP2() +{ + FILE *cpuinfo; + char buf[512]; + char *p; + long hwcap2 = 0; + + cpuinfo = fopen("/proc/cpuinfo", "r"); + if (!cpuinfo) { + return 0; + } + while (fgets(buf, 511, cpuinfo)) { + if (!memcmp(buf, "Features", 8)) { + p = strstr(buf, " aes"); + if (p && (p[4] == ' ' || p[4] == '\n')) { + hwcap2 |= HWCAP2_AES; + } + p = strstr(buf, " sha1"); + if (p && (p[5] == ' ' || p[5] == '\n')) { + hwcap2 |= HWCAP2_SHA1; + } + p = strstr(buf, " sha2"); + if (p && (p[5] == ' ' || p[5] == '\n')) { + hwcap2 |= HWCAP2_SHA2; + } + p = strstr(buf, " pmull"); + if (p && (p[6] == ' ' || p[6] == '\n')) { + hwcap2 |= HWCAP2_PMULL; + } + break; + } + } + + fclose(cpuinfo); + return hwcap2; +} +#endif /* __linux__ */ + +void +CheckARMSupport() +{ + char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); + if (getauxval) { + // Android's cpu-features.c uses AT_HWCAP2 for newer features. + // AT_HWCAP2 is implemented on newer devices / kernel, so we can trust + // it since cpu-features.c doesn't have workaround / fallback. + // Also, AT_HWCAP2 is supported by glibc 2.18+ on Linux/arm, If + // AT_HWCAP2 isn't supported by glibc or Linux kernel, getauxval will + // returns 0. + long hwcaps = getauxval(AT_HWCAP2); +#ifdef __linux__ + if (!hwcaps) { + // Some ARMv8 devices may not implement AT_HWCAP2. So we also + // read /proc/cpuinfo if AT_HWCAP2 is 0. + hwcaps = ReadCPUInfoForHWCAP2(); + } +#endif + arm_aes_support_ = hwcaps & HWCAP2_AES && disable_hw_aes == NULL; + arm_pmull_support_ = hwcaps & HWCAP2_PMULL; + arm_sha1_support_ = hwcaps & HWCAP2_SHA1; + arm_sha2_support_ = hwcaps & HWCAP2_SHA2; + } + arm_neon_support_ = GetNeonSupport(); + arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL; + arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL; +} +#endif /* defined(__arm__) */ + +// Enable when Firefox can use it for Android API 16 and 17. +// #if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) +// #include +// void +// CheckARMSupport() +// { +// char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON"); +// char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); +// AndroidCpuFamily family = android_getCpuFamily(); +// uint64_t features = android_getCpuFeatures(); +// if (family == ANDROID_CPU_FAMILY_ARM64) { +// arm_aes_support_ = features & ANDROID_CPU_ARM64_FEATURE_AES && +// disable_hw_aes == NULL; +// arm_pmull_support_ = features & ANDROID_CPU_ARM64_FEATURE_PMULL; +// arm_sha1_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA1; +// arm_sha2_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA2; +// arm_neon_support_ = disable_arm_neon == NULL; +// } +// if (family == ANDROID_CPU_FAMILY_ARM) { +// arm_aes_support_ = features & ANDROID_CPU_ARM_FEATURE_AES && +// disable_hw_aes == NULL; +// arm_pmull_support_ = features & ANDROID_CPU_ARM_FEATURE_PMULL; +// arm_sha1_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA1; +// arm_sha2_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA2; +// arm_neon_support_ = hwcaps & ANDROID_CPU_ARM_FEATURE_NEON && +// disable_arm_neon == NULL; +// } +// } +// #endif /* defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) */ + +PRBool +aesni_support() +{ + return aesni_support_; +} +PRBool +clmul_support() +{ + return clmul_support_; +} +PRBool +sha_support() +{ + return sha_support_; +} +PRBool +avx_support() +{ + return avx_support_; +} +PRBool +avx2_support() +{ + return avx2_support_; +} +PRBool +ssse3_support() +{ + return ssse3_support_; +} +PRBool +sse4_1_support() +{ + return sse4_1_support_; +} +PRBool +sse4_2_support() +{ + return sse4_2_support_; +} +PRBool +arm_neon_support() +{ + return arm_neon_support_; +} +PRBool +arm_aes_support() +{ + return arm_aes_support_; +} +PRBool +arm_pmull_support() +{ + return arm_pmull_support_; +} +PRBool +arm_sha1_support() +{ + return arm_sha1_support_; +} +PRBool +arm_sha2_support() +{ + return arm_sha2_support_; +} +PRBool +ppc_crypto_support() +{ + return ppc_crypto_support_; +} + +#if defined(__powerpc__) + +#ifndef __has_include +#define __has_include(x) 0 +#endif + +/* clang-format off */ +#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12) +#if __has_include() +#include +#endif +#elif (defined(__FreeBSD__) && __FreeBSD__ < 12) +#include +#endif + +// Defines from cputable.h in Linux kernel - PPC, letting us build on older kernels +#ifndef PPC_FEATURE2_VEC_CRYPTO +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif + +static void +CheckPPCSupport() +{ + char *disable_hw_crypto = PR_GetEnvSecure("NSS_DISABLE_PPC_GHASH"); + + unsigned long hwcaps = 0; +#if defined(__linux__) +#if __has_include() + hwcaps = getauxval(AT_HWCAP2); +#endif +#elif defined(__FreeBSD__) +#if __FreeBSD__ >= 12 +#if __has_include() + elf_aux_info(AT_HWCAP2, &hwcaps, sizeof(hwcaps)); +#endif +#else + size_t len = sizeof(hwcaps); + sysctlbyname("hw.cpu_features2", &hwcaps, &len, NULL, 0); +#endif +#endif + + ppc_crypto_support_ = hwcaps & PPC_FEATURE2_VEC_CRYPTO && disable_hw_crypto == NULL; +} +/* clang-format on */ + +#endif /* __powerpc__ */ + +static PRStatus +FreeblInit(void) +{ +#ifdef NSS_X86_OR_X64 + CheckX86CPUSupport(); +#elif (defined(__aarch64__) || defined(__arm__)) + CheckARMSupport(); +#elif (defined(__powerpc__)) + CheckPPCSupport(); +#endif + return PR_SUCCESS; +} + +SECStatus +BL_Init() +{ + if (PR_CallOnce(&coFreeblInit, FreeblInit) != PR_SUCCESS) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + RSA_Init(); + + return SECSuccess; +} diff --git a/security/nss/lib/freebl/blname.c b/security/nss/lib/freebl/blname.c new file mode 100644 index 0000000000..4bad74ada0 --- /dev/null +++ b/security/nss/lib/freebl/blname.c @@ -0,0 +1,100 @@ +/* + * blname.c - determine the freebl library name. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if defined(FREEBL_LOWHASH) +static const char* default_name = + SHLIB_PREFIX "freeblpriv" SHLIB_VERSION "." SHLIB_SUFFIX; +#else +static const char* default_name = + SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX; +#endif + +/* getLibName() returns the name of the library to load. */ + +#if defined(SOLARIS) && defined(__sparc) +#include +#include +#include + +#if defined(NSS_USE_64) + +const static char fpu_hybrid_shared_lib[] = "libfreebl_64fpu_3.so"; +const static char int_hybrid_shared_lib[] = "libfreebl_64int_3.so"; +const static char non_hybrid_shared_lib[] = "libfreebl_64fpu_3.so"; + +const static char int_hybrid_isa[] = "sparcv9"; +const static char fpu_hybrid_isa[] = "sparcv9+vis"; + +#else + +const static char fpu_hybrid_shared_lib[] = "libfreebl_32fpu_3.so"; +const static char int_hybrid_shared_lib[] = "libfreebl_32int64_3.so"; +/* This was for SPARC V8, now obsolete. */ +const static char* const non_hybrid_shared_lib = NULL; + +const static char int_hybrid_isa[] = "sparcv8plus"; +const static char fpu_hybrid_isa[] = "sparcv8plus+vis"; + +#endif + +static const char* +getLibName(void) +{ + char* found_int_hybrid; + char* found_fpu_hybrid; + long buflen; + char buf[256]; + + buflen = sysinfo(SI_ISALIST, buf, sizeof buf); + if (buflen <= 0) + return NULL; + /* sysinfo output is always supposed to be NUL terminated, but ... */ + if (buflen < sizeof buf) + buf[buflen] = '\0'; + else + buf[(sizeof buf) - 1] = '\0'; + /* The ISA list is a space separated string of names of ISAs and + * ISA extensions, in order of decreasing performance. + * There are two different ISAs with which NSS's crypto code can be + * accelerated. If both are in the list, we take the first one. + * If one is in the list, we use it, and if neither then we use + * the base unaccelerated code. + */ + found_int_hybrid = strstr(buf, int_hybrid_isa); + found_fpu_hybrid = strstr(buf, fpu_hybrid_isa); + if (found_fpu_hybrid && + (!found_int_hybrid || + (found_int_hybrid - found_fpu_hybrid) >= 0)) { + return fpu_hybrid_shared_lib; + } + if (found_int_hybrid) { + return int_hybrid_shared_lib; + } + return non_hybrid_shared_lib; +} + +#elif defined(HPUX) && !defined(NSS_USE_64) && !defined(__ia64) +#include + +/* This code tests to see if we're running on a PA2.x CPU. +** It returns true (1) if so, and false (0) otherwise. +*/ +static const char* +getLibName(void) +{ + long cpu = sysconf(_SC_CPU_VERSION); + return (cpu == CPU_PA_RISC2_0) + ? "libfreebl_32fpu_3.sl" + : "libfreebl_32int_3.sl"; +} +#else +/* default case, for platforms/ABIs that have only one freebl shared lib. */ +static const char* +getLibName(void) +{ + return default_name; +} +#endif diff --git a/security/nss/lib/freebl/camellia.c b/security/nss/lib/freebl/camellia.c new file mode 100644 index 0000000000..80a8472a79 --- /dev/null +++ b/security/nss/lib/freebl/camellia.c @@ -0,0 +1,1896 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prinit.h" +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" +#include "camellia.h" +#include "sha_fast.h" /* for SHA_HTONL and related configuration macros */ + +/* key constants */ + +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* + * macros + */ + +#if defined(HAVE_UNALIGNED_ACCESS) + +/* require a CPU that allows unaligned access */ + +#if defined(SHA_NEED_TMP_VARIABLE) +#define CAMELLIA_NEED_TMP_VARIABLE 1 +#endif + +#define GETU32(p) SHA_HTONL(*((PRUint32 *)(p))) +#define PUTU32(ct, st) \ + { \ + *((PRUint32 *)(ct)) = SHA_HTONL(st); \ + } + +#else /* no unaligned access */ + +#define GETU32(pt) \ + (((PRUint32)(pt)[0] << 24) ^ ((PRUint32)(pt)[1] << 16) ^ ((PRUint32)(pt)[2] << 8) ^ ((PRUint32)(pt)[3])) + +#define PUTU32(ct, st) \ + { \ + (ct)[0] = (PRUint8)((st) >> 24); \ + (ct)[1] = (PRUint8)((st) >> 16); \ + (ct)[2] = (PRUint8)((st) >> 8); \ + (ct)[3] = (PRUint8)(st); \ + } + +#endif + +#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2]) +#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1]) + +/* rotation right shift 1byte */ +#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24)) +/* rotation left shift 1bit */ +#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31)) +/* rotation left shift 1byte */ +#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24)) + +#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + ll = (ll << bits) + (lr >> (32 - bits)); \ + lr = (lr << bits) + (rl >> (32 - bits)); \ + rl = (rl << bits) + (rr >> (32 - bits)); \ + rr = (rr << bits) + (w0 >> (32 - bits)); \ + } while (0) + +#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + w1 = lr; \ + ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \ + lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \ + rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \ + rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \ + } while (0) + +#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)]) +#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)]) +#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)]) +#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)]) + +#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + il = xl ^ kl; \ + ir = xr ^ kr; \ + t0 = il >> 16; \ + t1 = ir >> 16; \ + yl = CAMELLIA_SP1110(ir & 0xff) ^ \ + CAMELLIA_SP0222((t1 >> 8) & 0xff) ^ \ + CAMELLIA_SP3033(t1 & 0xff) ^ \ + CAMELLIA_SP4404((ir >> 8) & 0xff); \ + yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) ^ \ + CAMELLIA_SP0222(t0 & 0xff) ^ \ + CAMELLIA_SP3033((il >> 8) & 0xff) ^ \ + CAMELLIA_SP4404(il & 0xff); \ + yl ^= yr; \ + yr = CAMELLIA_RR8(yr); \ + yr ^= yl; \ + } while (0) + +/* + * for speed up + * + */ +#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \ + do { \ + t0 = kll; \ + t0 &= ll; \ + lr ^= CAMELLIA_RL1(t0); \ + t1 = klr; \ + t1 |= lr; \ + ll ^= t1; \ + \ + t2 = krr; \ + t2 |= rr; \ + rl ^= t2; \ + t3 = krl; \ + t3 &= rl; \ + rr ^= CAMELLIA_RL1(t3); \ + } while (0) + +#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + ir = CAMELLIA_SP1110(xr & 0xff) ^ \ + CAMELLIA_SP0222((xr >> 24) & 0xff) ^ \ + CAMELLIA_SP3033((xr >> 16) & 0xff) ^ \ + CAMELLIA_SP4404((xr >> 8) & 0xff); \ + il = CAMELLIA_SP1110((xl >> 24) & 0xff) ^ \ + CAMELLIA_SP0222((xl >> 16) & 0xff) ^ \ + CAMELLIA_SP3033((xl >> 8) & 0xff) ^ \ + CAMELLIA_SP4404(xl & 0xff); \ + il ^= kl; \ + ir ^= kr; \ + ir ^= il; \ + il = CAMELLIA_RR8(il); \ + il ^= ir; \ + yl ^= ir; \ + yr ^= il; \ + } while (0) + +static const PRUint32 camellia_sp1110[256] = { + 0x70707000, 0x82828200, 0x2c2c2c00, 0xececec00, + 0xb3b3b300, 0x27272700, 0xc0c0c000, 0xe5e5e500, + 0xe4e4e400, 0x85858500, 0x57575700, 0x35353500, + 0xeaeaea00, 0x0c0c0c00, 0xaeaeae00, 0x41414100, + 0x23232300, 0xefefef00, 0x6b6b6b00, 0x93939300, + 0x45454500, 0x19191900, 0xa5a5a500, 0x21212100, + 0xededed00, 0x0e0e0e00, 0x4f4f4f00, 0x4e4e4e00, + 0x1d1d1d00, 0x65656500, 0x92929200, 0xbdbdbd00, + 0x86868600, 0xb8b8b800, 0xafafaf00, 0x8f8f8f00, + 0x7c7c7c00, 0xebebeb00, 0x1f1f1f00, 0xcecece00, + 0x3e3e3e00, 0x30303000, 0xdcdcdc00, 0x5f5f5f00, + 0x5e5e5e00, 0xc5c5c500, 0x0b0b0b00, 0x1a1a1a00, + 0xa6a6a600, 0xe1e1e100, 0x39393900, 0xcacaca00, + 0xd5d5d500, 0x47474700, 0x5d5d5d00, 0x3d3d3d00, + 0xd9d9d900, 0x01010100, 0x5a5a5a00, 0xd6d6d600, + 0x51515100, 0x56565600, 0x6c6c6c00, 0x4d4d4d00, + 0x8b8b8b00, 0x0d0d0d00, 0x9a9a9a00, 0x66666600, + 0xfbfbfb00, 0xcccccc00, 0xb0b0b000, 0x2d2d2d00, + 0x74747400, 0x12121200, 0x2b2b2b00, 0x20202000, + 0xf0f0f000, 0xb1b1b100, 0x84848400, 0x99999900, + 0xdfdfdf00, 0x4c4c4c00, 0xcbcbcb00, 0xc2c2c200, + 0x34343400, 0x7e7e7e00, 0x76767600, 0x05050500, + 0x6d6d6d00, 0xb7b7b700, 0xa9a9a900, 0x31313100, + 0xd1d1d100, 0x17171700, 0x04040400, 0xd7d7d700, + 0x14141400, 0x58585800, 0x3a3a3a00, 0x61616100, + 0xdedede00, 0x1b1b1b00, 0x11111100, 0x1c1c1c00, + 0x32323200, 0x0f0f0f00, 0x9c9c9c00, 0x16161600, + 0x53535300, 0x18181800, 0xf2f2f200, 0x22222200, + 0xfefefe00, 0x44444400, 0xcfcfcf00, 0xb2b2b200, + 0xc3c3c300, 0xb5b5b500, 0x7a7a7a00, 0x91919100, + 0x24242400, 0x08080800, 0xe8e8e800, 0xa8a8a800, + 0x60606000, 0xfcfcfc00, 0x69696900, 0x50505000, + 0xaaaaaa00, 0xd0d0d000, 0xa0a0a000, 0x7d7d7d00, + 0xa1a1a100, 0x89898900, 0x62626200, 0x97979700, + 0x54545400, 0x5b5b5b00, 0x1e1e1e00, 0x95959500, + 0xe0e0e000, 0xffffff00, 0x64646400, 0xd2d2d200, + 0x10101000, 0xc4c4c400, 0x00000000, 0x48484800, + 0xa3a3a300, 0xf7f7f700, 0x75757500, 0xdbdbdb00, + 0x8a8a8a00, 0x03030300, 0xe6e6e600, 0xdadada00, + 0x09090900, 0x3f3f3f00, 0xdddddd00, 0x94949400, + 0x87878700, 0x5c5c5c00, 0x83838300, 0x02020200, + 0xcdcdcd00, 0x4a4a4a00, 0x90909000, 0x33333300, + 0x73737300, 0x67676700, 0xf6f6f600, 0xf3f3f300, + 0x9d9d9d00, 0x7f7f7f00, 0xbfbfbf00, 0xe2e2e200, + 0x52525200, 0x9b9b9b00, 0xd8d8d800, 0x26262600, + 0xc8c8c800, 0x37373700, 0xc6c6c600, 0x3b3b3b00, + 0x81818100, 0x96969600, 0x6f6f6f00, 0x4b4b4b00, + 0x13131300, 0xbebebe00, 0x63636300, 0x2e2e2e00, + 0xe9e9e900, 0x79797900, 0xa7a7a700, 0x8c8c8c00, + 0x9f9f9f00, 0x6e6e6e00, 0xbcbcbc00, 0x8e8e8e00, + 0x29292900, 0xf5f5f500, 0xf9f9f900, 0xb6b6b600, + 0x2f2f2f00, 0xfdfdfd00, 0xb4b4b400, 0x59595900, + 0x78787800, 0x98989800, 0x06060600, 0x6a6a6a00, + 0xe7e7e700, 0x46464600, 0x71717100, 0xbababa00, + 0xd4d4d400, 0x25252500, 0xababab00, 0x42424200, + 0x88888800, 0xa2a2a200, 0x8d8d8d00, 0xfafafa00, + 0x72727200, 0x07070700, 0xb9b9b900, 0x55555500, + 0xf8f8f800, 0xeeeeee00, 0xacacac00, 0x0a0a0a00, + 0x36363600, 0x49494900, 0x2a2a2a00, 0x68686800, + 0x3c3c3c00, 0x38383800, 0xf1f1f100, 0xa4a4a400, + 0x40404000, 0x28282800, 0xd3d3d300, 0x7b7b7b00, + 0xbbbbbb00, 0xc9c9c900, 0x43434300, 0xc1c1c100, + 0x15151500, 0xe3e3e300, 0xadadad00, 0xf4f4f400, + 0x77777700, 0xc7c7c700, 0x80808000, 0x9e9e9e00 +}; + +static const PRUint32 camellia_sp0222[256] = { + 0x00e0e0e0, 0x00050505, 0x00585858, 0x00d9d9d9, + 0x00676767, 0x004e4e4e, 0x00818181, 0x00cbcbcb, + 0x00c9c9c9, 0x000b0b0b, 0x00aeaeae, 0x006a6a6a, + 0x00d5d5d5, 0x00181818, 0x005d5d5d, 0x00828282, + 0x00464646, 0x00dfdfdf, 0x00d6d6d6, 0x00272727, + 0x008a8a8a, 0x00323232, 0x004b4b4b, 0x00424242, + 0x00dbdbdb, 0x001c1c1c, 0x009e9e9e, 0x009c9c9c, + 0x003a3a3a, 0x00cacaca, 0x00252525, 0x007b7b7b, + 0x000d0d0d, 0x00717171, 0x005f5f5f, 0x001f1f1f, + 0x00f8f8f8, 0x00d7d7d7, 0x003e3e3e, 0x009d9d9d, + 0x007c7c7c, 0x00606060, 0x00b9b9b9, 0x00bebebe, + 0x00bcbcbc, 0x008b8b8b, 0x00161616, 0x00343434, + 0x004d4d4d, 0x00c3c3c3, 0x00727272, 0x00959595, + 0x00ababab, 0x008e8e8e, 0x00bababa, 0x007a7a7a, + 0x00b3b3b3, 0x00020202, 0x00b4b4b4, 0x00adadad, + 0x00a2a2a2, 0x00acacac, 0x00d8d8d8, 0x009a9a9a, + 0x00171717, 0x001a1a1a, 0x00353535, 0x00cccccc, + 0x00f7f7f7, 0x00999999, 0x00616161, 0x005a5a5a, + 0x00e8e8e8, 0x00242424, 0x00565656, 0x00404040, + 0x00e1e1e1, 0x00636363, 0x00090909, 0x00333333, + 0x00bfbfbf, 0x00989898, 0x00979797, 0x00858585, + 0x00686868, 0x00fcfcfc, 0x00ececec, 0x000a0a0a, + 0x00dadada, 0x006f6f6f, 0x00535353, 0x00626262, + 0x00a3a3a3, 0x002e2e2e, 0x00080808, 0x00afafaf, + 0x00282828, 0x00b0b0b0, 0x00747474, 0x00c2c2c2, + 0x00bdbdbd, 0x00363636, 0x00222222, 0x00383838, + 0x00646464, 0x001e1e1e, 0x00393939, 0x002c2c2c, + 0x00a6a6a6, 0x00303030, 0x00e5e5e5, 0x00444444, + 0x00fdfdfd, 0x00888888, 0x009f9f9f, 0x00656565, + 0x00878787, 0x006b6b6b, 0x00f4f4f4, 0x00232323, + 0x00484848, 0x00101010, 0x00d1d1d1, 0x00515151, + 0x00c0c0c0, 0x00f9f9f9, 0x00d2d2d2, 0x00a0a0a0, + 0x00555555, 0x00a1a1a1, 0x00414141, 0x00fafafa, + 0x00434343, 0x00131313, 0x00c4c4c4, 0x002f2f2f, + 0x00a8a8a8, 0x00b6b6b6, 0x003c3c3c, 0x002b2b2b, + 0x00c1c1c1, 0x00ffffff, 0x00c8c8c8, 0x00a5a5a5, + 0x00202020, 0x00898989, 0x00000000, 0x00909090, + 0x00474747, 0x00efefef, 0x00eaeaea, 0x00b7b7b7, + 0x00151515, 0x00060606, 0x00cdcdcd, 0x00b5b5b5, + 0x00121212, 0x007e7e7e, 0x00bbbbbb, 0x00292929, + 0x000f0f0f, 0x00b8b8b8, 0x00070707, 0x00040404, + 0x009b9b9b, 0x00949494, 0x00212121, 0x00666666, + 0x00e6e6e6, 0x00cecece, 0x00ededed, 0x00e7e7e7, + 0x003b3b3b, 0x00fefefe, 0x007f7f7f, 0x00c5c5c5, + 0x00a4a4a4, 0x00373737, 0x00b1b1b1, 0x004c4c4c, + 0x00919191, 0x006e6e6e, 0x008d8d8d, 0x00767676, + 0x00030303, 0x002d2d2d, 0x00dedede, 0x00969696, + 0x00262626, 0x007d7d7d, 0x00c6c6c6, 0x005c5c5c, + 0x00d3d3d3, 0x00f2f2f2, 0x004f4f4f, 0x00191919, + 0x003f3f3f, 0x00dcdcdc, 0x00797979, 0x001d1d1d, + 0x00525252, 0x00ebebeb, 0x00f3f3f3, 0x006d6d6d, + 0x005e5e5e, 0x00fbfbfb, 0x00696969, 0x00b2b2b2, + 0x00f0f0f0, 0x00313131, 0x000c0c0c, 0x00d4d4d4, + 0x00cfcfcf, 0x008c8c8c, 0x00e2e2e2, 0x00757575, + 0x00a9a9a9, 0x004a4a4a, 0x00575757, 0x00848484, + 0x00111111, 0x00454545, 0x001b1b1b, 0x00f5f5f5, + 0x00e4e4e4, 0x000e0e0e, 0x00737373, 0x00aaaaaa, + 0x00f1f1f1, 0x00dddddd, 0x00595959, 0x00141414, + 0x006c6c6c, 0x00929292, 0x00545454, 0x00d0d0d0, + 0x00787878, 0x00707070, 0x00e3e3e3, 0x00494949, + 0x00808080, 0x00505050, 0x00a7a7a7, 0x00f6f6f6, + 0x00777777, 0x00939393, 0x00868686, 0x00838383, + 0x002a2a2a, 0x00c7c7c7, 0x005b5b5b, 0x00e9e9e9, + 0x00eeeeee, 0x008f8f8f, 0x00010101, 0x003d3d3d +}; + +static const PRUint32 camellia_sp3033[256] = { + 0x38003838, 0x41004141, 0x16001616, 0x76007676, + 0xd900d9d9, 0x93009393, 0x60006060, 0xf200f2f2, + 0x72007272, 0xc200c2c2, 0xab00abab, 0x9a009a9a, + 0x75007575, 0x06000606, 0x57005757, 0xa000a0a0, + 0x91009191, 0xf700f7f7, 0xb500b5b5, 0xc900c9c9, + 0xa200a2a2, 0x8c008c8c, 0xd200d2d2, 0x90009090, + 0xf600f6f6, 0x07000707, 0xa700a7a7, 0x27002727, + 0x8e008e8e, 0xb200b2b2, 0x49004949, 0xde00dede, + 0x43004343, 0x5c005c5c, 0xd700d7d7, 0xc700c7c7, + 0x3e003e3e, 0xf500f5f5, 0x8f008f8f, 0x67006767, + 0x1f001f1f, 0x18001818, 0x6e006e6e, 0xaf00afaf, + 0x2f002f2f, 0xe200e2e2, 0x85008585, 0x0d000d0d, + 0x53005353, 0xf000f0f0, 0x9c009c9c, 0x65006565, + 0xea00eaea, 0xa300a3a3, 0xae00aeae, 0x9e009e9e, + 0xec00ecec, 0x80008080, 0x2d002d2d, 0x6b006b6b, + 0xa800a8a8, 0x2b002b2b, 0x36003636, 0xa600a6a6, + 0xc500c5c5, 0x86008686, 0x4d004d4d, 0x33003333, + 0xfd00fdfd, 0x66006666, 0x58005858, 0x96009696, + 0x3a003a3a, 0x09000909, 0x95009595, 0x10001010, + 0x78007878, 0xd800d8d8, 0x42004242, 0xcc00cccc, + 0xef00efef, 0x26002626, 0xe500e5e5, 0x61006161, + 0x1a001a1a, 0x3f003f3f, 0x3b003b3b, 0x82008282, + 0xb600b6b6, 0xdb00dbdb, 0xd400d4d4, 0x98009898, + 0xe800e8e8, 0x8b008b8b, 0x02000202, 0xeb00ebeb, + 0x0a000a0a, 0x2c002c2c, 0x1d001d1d, 0xb000b0b0, + 0x6f006f6f, 0x8d008d8d, 0x88008888, 0x0e000e0e, + 0x19001919, 0x87008787, 0x4e004e4e, 0x0b000b0b, + 0xa900a9a9, 0x0c000c0c, 0x79007979, 0x11001111, + 0x7f007f7f, 0x22002222, 0xe700e7e7, 0x59005959, + 0xe100e1e1, 0xda00dada, 0x3d003d3d, 0xc800c8c8, + 0x12001212, 0x04000404, 0x74007474, 0x54005454, + 0x30003030, 0x7e007e7e, 0xb400b4b4, 0x28002828, + 0x55005555, 0x68006868, 0x50005050, 0xbe00bebe, + 0xd000d0d0, 0xc400c4c4, 0x31003131, 0xcb00cbcb, + 0x2a002a2a, 0xad00adad, 0x0f000f0f, 0xca00caca, + 0x70007070, 0xff00ffff, 0x32003232, 0x69006969, + 0x08000808, 0x62006262, 0x00000000, 0x24002424, + 0xd100d1d1, 0xfb00fbfb, 0xba00baba, 0xed00eded, + 0x45004545, 0x81008181, 0x73007373, 0x6d006d6d, + 0x84008484, 0x9f009f9f, 0xee00eeee, 0x4a004a4a, + 0xc300c3c3, 0x2e002e2e, 0xc100c1c1, 0x01000101, + 0xe600e6e6, 0x25002525, 0x48004848, 0x99009999, + 0xb900b9b9, 0xb300b3b3, 0x7b007b7b, 0xf900f9f9, + 0xce00cece, 0xbf00bfbf, 0xdf00dfdf, 0x71007171, + 0x29002929, 0xcd00cdcd, 0x6c006c6c, 0x13001313, + 0x64006464, 0x9b009b9b, 0x63006363, 0x9d009d9d, + 0xc000c0c0, 0x4b004b4b, 0xb700b7b7, 0xa500a5a5, + 0x89008989, 0x5f005f5f, 0xb100b1b1, 0x17001717, + 0xf400f4f4, 0xbc00bcbc, 0xd300d3d3, 0x46004646, + 0xcf00cfcf, 0x37003737, 0x5e005e5e, 0x47004747, + 0x94009494, 0xfa00fafa, 0xfc00fcfc, 0x5b005b5b, + 0x97009797, 0xfe00fefe, 0x5a005a5a, 0xac00acac, + 0x3c003c3c, 0x4c004c4c, 0x03000303, 0x35003535, + 0xf300f3f3, 0x23002323, 0xb800b8b8, 0x5d005d5d, + 0x6a006a6a, 0x92009292, 0xd500d5d5, 0x21002121, + 0x44004444, 0x51005151, 0xc600c6c6, 0x7d007d7d, + 0x39003939, 0x83008383, 0xdc00dcdc, 0xaa00aaaa, + 0x7c007c7c, 0x77007777, 0x56005656, 0x05000505, + 0x1b001b1b, 0xa400a4a4, 0x15001515, 0x34003434, + 0x1e001e1e, 0x1c001c1c, 0xf800f8f8, 0x52005252, + 0x20002020, 0x14001414, 0xe900e9e9, 0xbd00bdbd, + 0xdd00dddd, 0xe400e4e4, 0xa100a1a1, 0xe000e0e0, + 0x8a008a8a, 0xf100f1f1, 0xd600d6d6, 0x7a007a7a, + 0xbb00bbbb, 0xe300e3e3, 0x40004040, 0x4f004f4f +}; + +static const PRUint32 camellia_sp4404[256] = { + 0x70700070, 0x2c2c002c, 0xb3b300b3, 0xc0c000c0, + 0xe4e400e4, 0x57570057, 0xeaea00ea, 0xaeae00ae, + 0x23230023, 0x6b6b006b, 0x45450045, 0xa5a500a5, + 0xeded00ed, 0x4f4f004f, 0x1d1d001d, 0x92920092, + 0x86860086, 0xafaf00af, 0x7c7c007c, 0x1f1f001f, + 0x3e3e003e, 0xdcdc00dc, 0x5e5e005e, 0x0b0b000b, + 0xa6a600a6, 0x39390039, 0xd5d500d5, 0x5d5d005d, + 0xd9d900d9, 0x5a5a005a, 0x51510051, 0x6c6c006c, + 0x8b8b008b, 0x9a9a009a, 0xfbfb00fb, 0xb0b000b0, + 0x74740074, 0x2b2b002b, 0xf0f000f0, 0x84840084, + 0xdfdf00df, 0xcbcb00cb, 0x34340034, 0x76760076, + 0x6d6d006d, 0xa9a900a9, 0xd1d100d1, 0x04040004, + 0x14140014, 0x3a3a003a, 0xdede00de, 0x11110011, + 0x32320032, 0x9c9c009c, 0x53530053, 0xf2f200f2, + 0xfefe00fe, 0xcfcf00cf, 0xc3c300c3, 0x7a7a007a, + 0x24240024, 0xe8e800e8, 0x60600060, 0x69690069, + 0xaaaa00aa, 0xa0a000a0, 0xa1a100a1, 0x62620062, + 0x54540054, 0x1e1e001e, 0xe0e000e0, 0x64640064, + 0x10100010, 0x00000000, 0xa3a300a3, 0x75750075, + 0x8a8a008a, 0xe6e600e6, 0x09090009, 0xdddd00dd, + 0x87870087, 0x83830083, 0xcdcd00cd, 0x90900090, + 0x73730073, 0xf6f600f6, 0x9d9d009d, 0xbfbf00bf, + 0x52520052, 0xd8d800d8, 0xc8c800c8, 0xc6c600c6, + 0x81810081, 0x6f6f006f, 0x13130013, 0x63630063, + 0xe9e900e9, 0xa7a700a7, 0x9f9f009f, 0xbcbc00bc, + 0x29290029, 0xf9f900f9, 0x2f2f002f, 0xb4b400b4, + 0x78780078, 0x06060006, 0xe7e700e7, 0x71710071, + 0xd4d400d4, 0xabab00ab, 0x88880088, 0x8d8d008d, + 0x72720072, 0xb9b900b9, 0xf8f800f8, 0xacac00ac, + 0x36360036, 0x2a2a002a, 0x3c3c003c, 0xf1f100f1, + 0x40400040, 0xd3d300d3, 0xbbbb00bb, 0x43430043, + 0x15150015, 0xadad00ad, 0x77770077, 0x80800080, + 0x82820082, 0xecec00ec, 0x27270027, 0xe5e500e5, + 0x85850085, 0x35350035, 0x0c0c000c, 0x41410041, + 0xefef00ef, 0x93930093, 0x19190019, 0x21210021, + 0x0e0e000e, 0x4e4e004e, 0x65650065, 0xbdbd00bd, + 0xb8b800b8, 0x8f8f008f, 0xebeb00eb, 0xcece00ce, + 0x30300030, 0x5f5f005f, 0xc5c500c5, 0x1a1a001a, + 0xe1e100e1, 0xcaca00ca, 0x47470047, 0x3d3d003d, + 0x01010001, 0xd6d600d6, 0x56560056, 0x4d4d004d, + 0x0d0d000d, 0x66660066, 0xcccc00cc, 0x2d2d002d, + 0x12120012, 0x20200020, 0xb1b100b1, 0x99990099, + 0x4c4c004c, 0xc2c200c2, 0x7e7e007e, 0x05050005, + 0xb7b700b7, 0x31310031, 0x17170017, 0xd7d700d7, + 0x58580058, 0x61610061, 0x1b1b001b, 0x1c1c001c, + 0x0f0f000f, 0x16160016, 0x18180018, 0x22220022, + 0x44440044, 0xb2b200b2, 0xb5b500b5, 0x91910091, + 0x08080008, 0xa8a800a8, 0xfcfc00fc, 0x50500050, + 0xd0d000d0, 0x7d7d007d, 0x89890089, 0x97970097, + 0x5b5b005b, 0x95950095, 0xffff00ff, 0xd2d200d2, + 0xc4c400c4, 0x48480048, 0xf7f700f7, 0xdbdb00db, + 0x03030003, 0xdada00da, 0x3f3f003f, 0x94940094, + 0x5c5c005c, 0x02020002, 0x4a4a004a, 0x33330033, + 0x67670067, 0xf3f300f3, 0x7f7f007f, 0xe2e200e2, + 0x9b9b009b, 0x26260026, 0x37370037, 0x3b3b003b, + 0x96960096, 0x4b4b004b, 0xbebe00be, 0x2e2e002e, + 0x79790079, 0x8c8c008c, 0x6e6e006e, 0x8e8e008e, + 0xf5f500f5, 0xb6b600b6, 0xfdfd00fd, 0x59590059, + 0x98980098, 0x6a6a006a, 0x46460046, 0xbaba00ba, + 0x25250025, 0x42420042, 0xa2a200a2, 0xfafa00fa, + 0x07070007, 0x55550055, 0xeeee00ee, 0x0a0a000a, + 0x49490049, 0x68680068, 0x38380038, 0xa4a400a4, + 0x28280028, 0x7b7b007b, 0xc9c900c9, 0xc1c100c1, + 0xe3e300e3, 0xf4f400f4, 0xc7c700c7, 0x9e9e009e +}; + +/** + * Stuff related to the Camellia key schedule + */ +#define subl(x) subL[(x)] +#define subr(x) subR[(x)] + +void +camellia_setup128(const unsigned char *key, PRUint32 *subkey) +{ + PRUint32 kll, klr, krl, krr; + PRUint32 il, ir, t0, t1, w0, w1; + PRUint32 kw4l, kw4r, dw, tl, tr; + PRUint32 subL[26]; + PRUint32 subR[26]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + /** + * k == kll || klr || krl || krr (|| is concatination) + */ + kll = GETU32(key); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + /** + * generate KL dependent subkeys + */ + subl(0) = kll; + subr(0) = klr; + subl(1) = krl; + subr(1) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(4) = kll; + subr(4) = klr; + subl(5) = krl; + subr(5) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(10) = kll; + subr(10) = klr; + subl(11) = krl; + subr(11) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(13) = krl; + subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(16) = kll; + subr(16) = klr; + subl(17) = krl; + subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(18) = kll; + subr(18) = klr; + subl(19) = krl; + subr(19) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; + subr(22) = klr; + subl(23) = krl; + subr(23) = krr; + + /* generate KA */ + kll = subl(0); + klr = subr(0); + krl = subl(1); + krr = subr(1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; + krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0; + krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; + klr ^= w1; + + /* generate KA dependent subkeys */ + subl(2) = kll; + subr(2) = klr; + subl(3) = krl; + subr(3) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; + subr(6) = klr; + subl(7) = krl; + subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(8) = kll; + subr(8) = klr; + subl(9) = krl; + subr(9) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(12) = kll; + subr(12) = klr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(14) = kll; + subr(14) = klr; + subl(15) = krl; + subr(15) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(20) = kll; + subr(20) = klr; + subl(21) = krl; + subr(21) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(24) = kll; + subr(24) = klr; + subl(25) = krl; + subr(25) = krr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); + subr(3) ^= subr(1); + subl(5) ^= subl(1); + subr(5) ^= subr(1); + subl(7) ^= subl(1); + subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); + subr(11) ^= subr(1); + subl(13) ^= subl(1); + subr(13) ^= subr(1); + subl(15) ^= subl(1); + subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); + subr(19) ^= subr(1); + subl(21) ^= subl(1); + subr(21) ^= subr(1); + subl(23) ^= subl(1); + subr(23) ^= subr(1); + subl(24) ^= subl(1); + subr(24) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(25); + kw4r = subr(25); + subl(22) ^= kw4l; + subr(22) ^= kw4r; + subl(20) ^= kw4l; + subr(20) ^= kw4r; + subl(18) ^= kw4l; + subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; + subr(14) ^= kw4r; + subl(12) ^= kw4l; + subr(12) ^= kw4r; + subl(10) ^= kw4l; + subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; + subr(6) ^= kw4r; + subl(4) ^= kw4l; + subr(4) ^= kw4r; + subl(2) ^= kw4l; + subr(2) ^= kw4r; + subl(0) ^= kw4l; + subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + CamelliaSubkeyL(23) = subl(22); + CamelliaSubkeyR(23) = subr(22); + CamelliaSubkeyL(24) = subl(24) ^ subl(23); + CamelliaSubkeyR(24) = subr(24) ^ subr(23); + + /* apply the inverse of the last half of P-function */ + dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw; + dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw; + dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw; + dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw; + dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw; + dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw; + dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw; + dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw; + dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw; + dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw; + dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw; + dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw; + dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw; + dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw; + dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw; + dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw; + dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw; + dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw; + + return; +} + +void +camellia_setup256(const unsigned char *key, PRUint32 *subkey) +{ + PRUint32 kll, klr, krl, krr; /* left half of key */ + PRUint32 krll, krlr, krrl, krrr; /* right half of key */ + PRUint32 il, ir, t0, t1, w0, w1; /* temporary variables */ + PRUint32 kw4l, kw4r, dw, tl, tr; + PRUint32 subL[34]; + PRUint32 subR[34]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + /** + * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr) + * (|| is concatination) + */ + + kll = GETU32(key); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + krll = GETU32(key + 16); + krlr = GETU32(key + 20); + krrl = GETU32(key + 24); + krrr = GETU32(key + 28); + + /* generate KL dependent subkeys */ + subl(0) = kll; + subr(0) = klr; + subl(1) = krl; + subr(1) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45); + subl(12) = kll; + subr(12) = klr; + subl(13) = krl; + subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(16) = kll; + subr(16) = klr; + subl(17) = krl; + subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; + subr(22) = klr; + subl(23) = krl; + subr(23) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(30) = kll; + subr(30) = klr; + subl(31) = krl; + subr(31) = krr; + + /* generate KR dependent subkeys */ + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(4) = krll; + subr(4) = krlr; + subl(5) = krrl; + subr(5) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(8) = krll; + subr(8) = krlr; + subl(9) = krrl; + subr(9) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(18) = krll; + subr(18) = krlr; + subl(19) = krrl; + subr(19) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + subl(26) = krll; + subr(26) = krlr; + subl(27) = krrl; + subr(27) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + + /* generate KA */ + kll = subl(0) ^ krll; + klr = subr(0) ^ krlr; + krl = subl(1) ^ krrl; + krr = subr(1) ^ krrr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; + krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + kll ^= krll; + klr ^= krlr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0 ^ krrl; + krr ^= w1 ^ krrr; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; + klr ^= w1; + + /* generate KB */ + krll ^= kll; + krlr ^= klr; + krrl ^= krl; + krrr ^= krr; + CAMELLIA_F(krll, krlr, + CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, + w0, w1, il, ir, t0, t1); + krrl ^= w0; + krrr ^= w1; + CAMELLIA_F(krrl, krrr, + CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, + w0, w1, il, ir, t0, t1); + krll ^= w0; + krlr ^= w1; + + /* generate KA dependent subkeys */ + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; + subr(6) = klr; + subl(7) = krl; + subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(14) = kll; + subr(14) = klr; + subl(15) = krl; + subr(15) = krr; + subl(24) = klr; + subr(24) = krl; + subl(25) = krr; + subr(25) = kll; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49); + subl(28) = kll; + subr(28) = klr; + subl(29) = krl; + subr(29) = krr; + + /* generate KB dependent subkeys */ + subl(2) = krll; + subr(2) = krlr; + subl(3) = krrl; + subr(3) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(10) = krll; + subr(10) = krlr; + subl(11) = krrl; + subr(11) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(20) = krll; + subr(20) = krlr; + subl(21) = krrl; + subr(21) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51); + subl(32) = krll; + subr(32) = krlr; + subl(33) = krrl; + subr(33) = krrr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); + subr(3) ^= subr(1); + subl(5) ^= subl(1); + subr(5) ^= subr(1); + subl(7) ^= subl(1); + subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); + subr(11) ^= subr(1); + subl(13) ^= subl(1); + subr(13) ^= subr(1); + subl(15) ^= subl(1); + subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); + subr(19) ^= subr(1); + subl(21) ^= subl(1); + subr(21) ^= subr(1); + subl(23) ^= subl(1); + subr(23) ^= subr(1); + subl(1) ^= subr(1) & ~subr(25); + dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); + subl(27) ^= subl(1); + subr(27) ^= subr(1); + subl(29) ^= subl(1); + subr(29) ^= subr(1); + subl(31) ^= subl(1); + subr(31) ^= subr(1); + subl(32) ^= subl(1); + subr(32) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(33); + kw4r = subr(33); + subl(30) ^= kw4l; + subr(30) ^= kw4r; + subl(28) ^= kw4l; + subr(28) ^= kw4r; + subl(26) ^= kw4l; + subr(26) ^= kw4r; + kw4l ^= kw4r & ~subr(24); + dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw); + subl(22) ^= kw4l; + subr(22) ^= kw4r; + subl(20) ^= kw4l; + subr(20) ^= kw4r; + subl(18) ^= kw4l; + subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; + subr(14) ^= kw4r; + subl(12) ^= kw4l; + subr(12) ^= kw4r; + subl(10) ^= kw4l; + subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; + subr(6) ^= kw4r; + subl(4) ^= kw4l; + subr(4) ^= kw4r; + subl(2) ^= kw4l; + subr(2) ^= kw4r; + subl(0) ^= kw4l; + subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + tl = subl(26) ^ (subr(26) & ~subr(24)); + dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(23) = subl(22) ^ tl; + CamelliaSubkeyR(23) = subr(22) ^ tr; + CamelliaSubkeyL(24) = subl(24); + CamelliaSubkeyR(24) = subr(24); + CamelliaSubkeyL(25) = subl(25); + CamelliaSubkeyR(25) = subr(25); + tl = subl(23) ^ (subr(23) & ~subr(25)); + dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(26) = tl ^ subl(27); + CamelliaSubkeyR(26) = tr ^ subr(27); + CamelliaSubkeyL(27) = subl(26) ^ subl(28); + CamelliaSubkeyR(27) = subr(26) ^ subr(28); + CamelliaSubkeyL(28) = subl(27) ^ subl(29); + CamelliaSubkeyR(28) = subr(27) ^ subr(29); + CamelliaSubkeyL(29) = subl(28) ^ subl(30); + CamelliaSubkeyR(29) = subr(28) ^ subr(30); + CamelliaSubkeyL(30) = subl(29) ^ subl(31); + CamelliaSubkeyR(30) = subr(29) ^ subr(31); + CamelliaSubkeyL(31) = subl(30); + CamelliaSubkeyR(31) = subr(30); + CamelliaSubkeyL(32) = subl(32) ^ subl(31); + CamelliaSubkeyR(32) = subr(32) ^ subr(31); + + /* apply the inverse of the last half of P-function */ + dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw; + dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw; + dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw; + dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw; + dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw; + dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw; + dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw; + dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw; + dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw; + dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw; + dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw; + dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw; + dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw; + dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw; + dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw; + dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw; + dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw; + dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw; + dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw; + dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw; + dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw; + dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw; + dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw; + dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw, CamelliaSubkeyL(31) = dw; + + return; +} + +void +camellia_setup192(const unsigned char *key, PRUint32 *subkey) +{ + unsigned char kk[32]; + PRUint32 krll, krlr, krrl, krrr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krll, key + 16, 4); + memcpy((unsigned char *)&krlr, key + 20, 4); + krrl = ~krll; + krrr = ~krlr; + memcpy(kk + 24, (unsigned char *)&krrl, 4); + memcpy(kk + 28, (unsigned char *)&krrr, 4); + camellia_setup256(kk, subkey); + return; +} + +/** + * Stuff related to camellia encryption/decryption + * + */ +SECStatus NO_SANITIZE_ALIGNMENT +camellia_encrypt128(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + /* main iteration */ + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(24); + io[3] ^= CamelliaSubkeyR(24); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +SECStatus NO_SANITIZE_ALIGNMENT +camellia_decrypt128(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; /* temporary valiables */ + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(24); + io[1] ^= CamelliaSubkeyR(24); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +/** + * stuff for 192 and 256bit encryption/decryption + */ +SECStatus NO_SANITIZE_ALIGNMENT +camellia_encrypt256(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; /* temporary valiables */ + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(24), CamelliaSubkeyR(24), + CamelliaSubkeyL(25), CamelliaSubkeyR(25), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(26), CamelliaSubkeyR(26), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(27), CamelliaSubkeyR(27), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(28), CamelliaSubkeyR(28), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(29), CamelliaSubkeyR(29), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(30), CamelliaSubkeyR(30), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(31), CamelliaSubkeyR(31), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(32); + io[3] ^= CamelliaSubkeyR(32); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +SECStatus NO_SANITIZE_ALIGNMENT +camellia_decrypt256(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; /* temporary valiables */ + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(32); + io[1] ^= CamelliaSubkeyR(32); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(31), CamelliaSubkeyR(31), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(30), CamelliaSubkeyR(30), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(29), CamelliaSubkeyR(29), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(28), CamelliaSubkeyR(28), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(27), CamelliaSubkeyR(27), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(26), CamelliaSubkeyR(26), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(25), CamelliaSubkeyR(25), + CamelliaSubkeyL(24), CamelliaSubkeyR(24), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +/************************************************************************** + * + * Stuff related to the Camellia key schedule + * + *************************************************************************/ + +SECStatus +camellia_key_expansion(CamelliaContext *cx, + const unsigned char *key, + const unsigned int keysize) +{ + cx->keysize = keysize; + + switch (keysize) { + case 16: + camellia_setup128(key, cx->expandedKey); + break; + case 24: + camellia_setup192(key, cx->expandedKey); + break; + case 32: + camellia_setup256(key, cx->expandedKey); + break; + default: + break; + } + return SECSuccess; +} + +/************************************************************************** + * + * Camellia modes of operation (ECB and CBC) + * + *************************************************************************/ + +SECStatus +camellia_encryptECB(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + CamelliaBlockFunc *encryptor; + + encryptor = (cx->keysize == 16) + ? &camellia_encrypt128 + : &camellia_encrypt256; + + while (inputLen > 0) { + (*encryptor)(cx->expandedKey, output, input); + + output += CAMELLIA_BLOCK_SIZE; + input += CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + return SECSuccess; +} + +SECStatus +camellia_encryptCBC(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + unsigned int j; + unsigned char *lastblock; + unsigned char inblock[CAMELLIA_BLOCK_SIZE]; + CamelliaBlockFunc *encryptor; + + if (!inputLen) + return SECSuccess; + lastblock = cx->iv; + + encryptor = (cx->keysize == 16) + ? &camellia_encrypt128 + : &camellia_encrypt256; + + while (inputLen > 0) { + /* XOR with the last block (IV if first block) */ + for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j) + inblock[j] = input[j] ^ lastblock[j]; + /* encrypt */ + (*encryptor)(cx->expandedKey, output, inblock); + + /* move to the next block */ + lastblock = output; + output += CAMELLIA_BLOCK_SIZE; + input += CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + memcpy(cx->iv, lastblock, CAMELLIA_BLOCK_SIZE); + return SECSuccess; +} + +SECStatus +camellia_decryptECB(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + CamelliaBlockFunc *decryptor; + + decryptor = (cx->keysize == 16) + ? &camellia_decrypt128 + : &camellia_decrypt256; + + while (inputLen > 0) { + + (*decryptor)(cx->expandedKey, output, input); + + output += CAMELLIA_BLOCK_SIZE; + input += CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + return SECSuccess; +} + +SECStatus +camellia_decryptCBC(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + const unsigned char *in; + unsigned char *out; + unsigned int j; + unsigned char newIV[CAMELLIA_BLOCK_SIZE]; + CamelliaBlockFunc *decryptor; + + if (!inputLen) + return SECSuccess; + + PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); + + in = input + (inputLen - CAMELLIA_BLOCK_SIZE); + memcpy(newIV, in, CAMELLIA_BLOCK_SIZE); + out = output + (inputLen - CAMELLIA_BLOCK_SIZE); + + decryptor = (cx->keysize == 16) + ? &camellia_decrypt128 + : &camellia_decrypt256; + + while (inputLen > CAMELLIA_BLOCK_SIZE) { + (*decryptor)(cx->expandedKey, out, in); + + for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j) + out[j] ^= in[(int)(j - CAMELLIA_BLOCK_SIZE)]; + + out -= CAMELLIA_BLOCK_SIZE; + in -= CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + if (in == input) { + (*decryptor)(cx->expandedKey, out, in); + + for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j) + out[j] ^= cx->iv[j]; + } + memcpy(cx->iv, newIV, CAMELLIA_BLOCK_SIZE); + return SECSuccess; +} + +/************************************************************************** + * + * BLAPI Interface functions + * + *************************************************************************/ + +CamelliaContext * +Camellia_AllocateContext(void) +{ + return PORT_ZNew(CamelliaContext); +} + +SECStatus +Camellia_InitContext(CamelliaContext *cx, const unsigned char *key, + unsigned int keysize, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int unused) +{ + if (key == NULL || + (keysize != 16 && keysize != 24 && keysize != 32)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_CAMELLIA_CBC && iv == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_CAMELLIA_CBC) { + memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE); + cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC; + } else { + cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB; + } + + /* Generate expanded key */ + if (camellia_key_expansion(cx, key, keysize) != SECSuccess) + goto cleanup; + + return SECSuccess; +cleanup: + return SECFailure; +} + +/* + * Camellia_CreateContext + * create a new context for Camellia operations + */ + +CamelliaContext * +Camellia_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keysize) +{ + CamelliaContext *cx; + + if (key == NULL || + (keysize != 16 && keysize != 24 && keysize != 32)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + if (mode == NSS_CAMELLIA_CBC && iv == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + cx = PORT_ZNew(CamelliaContext); + if (!cx) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + + /* copy in the iv, if neccessary */ + if (mode == NSS_CAMELLIA_CBC) { + memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE); + cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC; + } else { + cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB; + } + /* copy keysize */ + cx->keysize = keysize; + + /* Generate expanded key */ + if (camellia_key_expansion(cx, key, keysize) != SECSuccess) + goto cleanup; + + return cx; +cleanup: + PORT_ZFree(cx, sizeof *cx); + return NULL; +} + +/* + * Camellia_DestroyContext + * + * Zero an Camellia cipher context. If freeit is true, also free the pointer + * to the context. + */ +void +Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit) +{ + if (cx) + memset(cx, 0, sizeof *cx); + if (freeit) + PORT_Free(cx); +} + +/* + * Camellia_Encrypt + * + * Encrypt an arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +Camellia_Encrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + + /* Check args */ + if (cx == NULL || output == NULL || input == NULL || + outputLen == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (inputLen % CAMELLIA_BLOCK_SIZE != 0) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; + + return (*cx->worker)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +/* + * Camellia_Decrypt + * + * Decrypt and arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +Camellia_Decrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + + /* Check args */ + if (cx == NULL || output == NULL || input == NULL || outputLen == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (inputLen % CAMELLIA_BLOCK_SIZE != 0) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; + + return (*cx->worker)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} diff --git a/security/nss/lib/freebl/camellia.h b/security/nss/lib/freebl/camellia.h new file mode 100644 index 0000000000..15114db9a6 --- /dev/null +++ b/security/nss/lib/freebl/camellia.h @@ -0,0 +1,42 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _CAMELLIA_H_ +#define _CAMELLIA_H_ 1 + +#define CAMELLIA_BLOCK_SIZE 16 /* bytes */ +#define CAMELLIA_MIN_KEYSIZE 16 /* bytes */ +#define CAMELLIA_MAX_KEYSIZE 32 /* bytes */ + +#define CAMELLIA_MAX_EXPANDEDKEY (34 * 2) /* 32bit unit */ + +typedef PRUint32 KEY_TABLE_TYPE[CAMELLIA_MAX_EXPANDEDKEY]; + +typedef SECStatus CamelliaFunc(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +typedef SECStatus CamelliaBlockFunc(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input); + +/* CamelliaContextStr + * + * Values which maintain the state for Camellia encryption/decryption. + * + * keysize - the number of key bits + * worker - the encryption/decryption function to use with this context + * iv - initialization vector for CBC mode + * expandedKey - the round keys in 4-byte words + */ +struct CamelliaContextStr { + PRUint32 keysize; /* bytes */ + CamelliaFunc *worker; + PRUint32 expandedKey[CAMELLIA_MAX_EXPANDEDKEY]; + PRUint8 iv[CAMELLIA_BLOCK_SIZE]; +}; + +#endif /* _CAMELLIA_H_ */ diff --git a/security/nss/lib/freebl/chacha20-ppc64le.S b/security/nss/lib/freebl/chacha20-ppc64le.S new file mode 100644 index 0000000000..487ff830a5 --- /dev/null +++ b/security/nss/lib/freebl/chacha20-ppc64le.S @@ -0,0 +1,668 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +# vs0 - vs15 : buffer for xor +# vs32 - vs47 (v0 - v15) : 4 "converted" states +# vs48 - vs51 (v16 - v19) : original state +# vs52 - vs55 (v20 - v23) : "converted" constants +# vs56 (v24) : "converted" counter +# vs57 (v25) : increment for "converted" counter +# vs60 - vs63 (v28 - v31) : constants for rotate left or vpermxor + +#define r0 0 +#define sp 1 +#define r2 2 +#define rSIZE 3 +#define rDST 4 +#define rSRC 5 +#define rKEY 6 +#define rNONCE 7 +#define rCNTR 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +#define v0 0 +#define v1 1 +#define v2 2 +#define v3 3 +#define v4 4 +#define v5 5 +#define v6 6 +#define v7 7 +#define v8 8 +#define v9 9 +#define v10 10 +#define v11 11 +#define v12 12 +#define v13 13 +#define v14 14 +#define v15 15 +#define v16 16 +#define v17 17 +#define v18 18 +#define v19 19 +#define v20 20 +#define v21 21 +#define v22 22 +#define v23 23 +#define v24 24 +#define v25 25 +#define v26 26 +#define v27 27 +#define v28 28 +#define v29 29 +#define v30 30 +#define v31 31 + +#define vs0 0 +#define vs1 1 +#define vs2 2 +#define vs3 3 +#define vs4 4 +#define vs5 5 +#define vs6 6 +#define vs7 7 +#define vs8 8 +#define vs9 9 +#define vs10 10 +#define vs11 11 +#define vs12 12 +#define vs13 13 +#define vs14 14 +#define vs15 15 +#define vs16 16 +#define vs17 17 +#define vs18 18 +#define vs19 19 +#define vs20 20 +#define vs21 21 +#define vs22 22 +#define vs23 23 +#define vs24 24 +#define vs25 25 +#define vs26 26 +#define vs27 27 +#define vs28 28 +#define vs29 29 +#define vs30 30 +#define vs31 31 +#define vs32 32 +#define vs33 33 +#define vs34 34 +#define vs35 35 +#define vs36 36 +#define vs37 37 +#define vs38 38 +#define vs39 39 +#define vs40 40 +#define vs41 41 +#define vs42 42 +#define vs43 43 +#define vs44 44 +#define vs45 45 +#define vs46 46 +#define vs47 47 +#define vs48 48 +#define vs49 49 +#define vs50 50 +#define vs51 51 +#define vs52 52 +#define vs53 53 +#define vs54 54 +#define vs55 55 +#define vs56 56 +#define vs57 57 +#define vs58 58 +#define vs59 59 +#define vs60 60 +#define vs61 61 +#define vs62 62 +#define vs63 63 + +.abiversion 2 +.section ".data" +.align 5 +lblock: .skip 256 +cnts0: .long 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 +cnts1: .long 0x61707865, 0x61707865, 0x61707865, 0x61707865 +cnts2: .long 0x3320646e, 0x3320646e, 0x3320646e, 0x3320646e +cnts3: .long 0x79622d32, 0x79622d32, 0x79622d32, 0x79622d32 +cnts4: .long 0x6b206574, 0x6b206574, 0x6b206574, 0x6b206574 +st4: .long 0, 0, 0, 0 +cntr: .long 0, 0, 0, 0 +incr: .long 4, 4, 4, 4 +rotl1: .long 0x22330011, 0x66774455, 0xAABB8899, 0xEEFFCCDD +rotl2: .long 12, 12, 12, 12 +rotl3: .long 0x11223300, 0x55667744, 0x99AABB88, 0xDDEEFFCC +rotl4: .long 7, 7, 7, 7 + +.section ".text" +.align 5 +.globl chacha20vsx +.type chacha20vsx, @function +chacha20vsx: + # prologue + addis 2, r12, .TOC.-chacha20vsx@ha + addi 2, 2, .TOC.-chacha20vsx@l + .localentry chacha20vsx, .-chacha20vsx + std r14, -8(sp) + std r15, -16(sp) + std r16, -24(sp) + std r17, -32(sp) + std r18, -40(sp) + std r19, -48(sp) + std r20, -56(sp) + std r21, -64(sp) + std r22, -72(sp) + std r23, -80(sp) + std r24, -88(sp) + std r25, -96(sp) + std r26, -104(sp) + std r27, -112(sp) + std r28, -120(sp) + std r29, -128(sp) + std r30, -136(sp) + std r31, -144(sp) + + addi r14, sp, -160 + + li r16, -16 + li r17, -32 + li r18, -48 + li r19, -64 + li r20, -80 + li r21, -96 + li r22, -112 + li r23, -128 + li r24, -144 + li r25, -160 + li r26, -176 + li r27, -192 + li r28, -208 + + # save f14, f15 + stxvw4x vs14, 0, r14 + stxvw4x vs15, r16, r14 + + # save v20 - v31 + stxvw4x vs52, r17, r14 + stxvw4x vs53, r18, r14 + stxvw4x vs54, r19, r14 + stxvw4x vs55, r20, r14 + stxvw4x vs56, r21, r14 + stxvw4x vs57, r22, r14 + stxvw4x vs58, r23, r14 + stxvw4x vs59, r24, r14 + stxvw4x vs60, r25, r14 + stxvw4x vs61, r26, r14 + stxvw4x vs62, r27, r14 + stxvw4x vs63, r28, r14 + + # offset in src/dst + li r17, 16 + li r18, 32 + li r19, 48 + li r20, 64 + li r21, 80 + li r22, 96 + li r23, 112 + li r24, 128 + li r25, 144 + li r26, 160 + li r27, 176 + li r28, 192 + li r29, 208 + li r30, 224 + li r31, 240 + + # load const's address + addis r14, 2, cnts0@toc@ha + addi r14, r14, cnts0@toc@l + + # save nonce to st4 + lwz r15, 0(rNONCE) + stw r15, 84(r14) + lwz r15, 4(rNONCE) + stw r15, 88(r14) + lwz r15, 8(rNONCE) + stw r15, 92(r14) + + # load state to vectors + lxvw4x vs48, 0, r14 + lxvw4x vs49, 0, rKEY + lxvw4x vs50, r17, rKEY + lxvw4x vs51, r21, r14 + + # load consts for x4 rounds + lxvw4x vs52, r17, r14 + lxvw4x vs53, r18, r14 + lxvw4x vs54, r19, r14 + lxvw4x vs55, r20, r14 + + # counter + stw rCNTR, 96(r14) + addi rCNTR, rCNTR, 1 + stw rCNTR, 100(r14) + addi rCNTR, rCNTR, 1 + stw rCNTR, 104(r14) + addi rCNTR, rCNTR, 1 + stw rCNTR, 108(r14) + lxvw4x vs56, r22, r14 + + # load increment + lxvw4x vs57, r23, r14 + + # load rotl to vectors + lxvw4x vs60, r24, r14 + lxvw4x vs61, r25, r14 + lxvw4x vs62, r26, r14 + lxvw4x vs63, r27, r14 + + # counter for loop = size/256 + li r15, 256 + divdu. r16, rSIZE, r15 + beq lastblock + mtctr r16 + +mainloop: + # init 16 vectors (4 states x4) + vor v0, v20, v20 + vor v1, v21, v21 + vor v2, v22, v22 + vor v3, v23, v23 + vspltw v4, v17, v0 + vspltw v5, v17, v1 + vspltw v6, v17, v2 + vspltw v7, v17, v3 + vspltw v8, v18, v0 + vspltw v9, v18, v1 + vspltw v10, v18, v2 + vspltw v11, v18, v3 + vor v12, v24, v24 + vspltw v13, v19, v1 + vspltw v14, v19, v2 + vspltw v15, v19, v3 + +.macro _plus a b_y b_x + vadduwm \a, \a, \b_y*4+(\b_x)%4 + vadduwm \a+1, \a+1, \b_y*4+(\b_x+1)%4 + vadduwm \a+2, \a+2, \b_y*4+(\b_x+2)%4 + vadduwm \a+3, \a+3, \b_y*4+(\b_x+3)%4 +.endm + +.macro _xor a b_y b_x + vxor \a, \a, \b_y*4+(\b_x)%4 + vxor \a+1, \a+1, \b_y*4+(\b_x+1)%4 + vxor \a+2, \a+2, \b_y*4+(\b_x+2)%4 + vxor \a+3, \a+3, \b_y*4+(\b_x+3)%4 +.endm + +.macro _rotl a b + vrlw \a, \a, \b + vrlw \a+1, \a+1, \b + vrlw \a+2, \a+2, \b + vrlw \a+3, \a+3, \b +.endm + +.macro _pxor a b_y b_x c + vpermxor \a, \a, \b_y*4+(\b_x)%4, \c + vpermxor \a+1, \a+1, \b_y*4+(\b_x+1)%4, \c + vpermxor \a+2, \a+2, \b_y*4+(\b_x+2)%4, \c + vpermxor \a+3, \a+3, \b_y*4+(\b_x+3)%4, \c +.endm + +# 00 01 02 03 +# 04 05 06 07 +# 08 09 10 11 +# 12 13 14 15 +.macro doubleround + # column round + _plus v0, v1, v0 # a+=b + _pxor v12, v0, v0, v28 # d^=a; d<<<=16 + _plus v8, v3, v0 # c+=d + _xor v4, v2, v0 # b^=c + _rotl v4, v29 # b<<<=12 + _plus v0, v1, v0 # a+=b + _pxor v12, v0, v0, v30 # d^=a; d<<<=8 + _plus v8, v3, v0 # c+=d + _xor v4, v2, v0 # b^=c + _rotl v4, v31 # b<<<=7 + + # diagonal round + _plus v0, v1, v1 # a+=b + _pxor v12, v0, v1, v28 # d^=a; d<<<=16 + _plus v8, v3, v1 # c+=d + _xor v4, v2, v1 # b^=c + _rotl v4, v29 # b<<<=12 + _plus v0, v1, v1 # a+=b + _pxor v12, v0, v1, v30 # d^=a; d<<<=8 + _plus v8, v3, v1 # c+=d + _xor v4, v2, v1 # b^=c + _rotl v4, v31 # b<<<=7 +.endm + + doubleround # 1 + doubleround # 2 + doubleround # 3 + doubleround # 4 + doubleround # 5 + doubleround # 6 + doubleround # 7 + doubleround # 8 + doubleround # 9 + doubleround # 10 + + # counter += original counter + vadduwm v12, v12, v24 + +.macro convert a + vmrgew 26, 0+\a, 1+\a + vmrgew 27, 2+\a, 3+\a + vmrgow 0+\a, 0+\a, 1+\a + vmrgow 2+\a, 2+\a, 3+\a + xxmrghd 33+\a, 32+\a, 34+\a + xxmrgld 35+\a, 32+\a, 34+\a + xxmrghd 32+\a, 58, 59 + xxmrgld 34+\a, 58, 59 +.endm + + convert 0 + convert 4 + convert 8 + convert 12 + +.macro addition a + vadduwm 0+\a, 0+\a, 16 + vadduwm 4+\a, 4+\a, 17 + vadduwm 8+\a, 8+\a, 18 + vadduwm 12+\a, 12+\a, 19 +.endm + + addition 0 + addition 1 + addition 2 + addition 3 + + # load text/cipher + lxvw4x vs0, 0, rSRC + lxvw4x vs1, r17, rSRC + lxvw4x vs2, r18, rSRC + lxvw4x vs3, r19, rSRC + lxvw4x vs4, r20, rSRC + lxvw4x vs5, r21, rSRC + lxvw4x vs6, r22, rSRC + lxvw4x vs7, r23, rSRC + lxvw4x vs8, r24, rSRC + lxvw4x vs9, r25, rSRC + lxvw4x vs10, r26, rSRC + lxvw4x vs11, r27, rSRC + lxvw4x vs12, r28, rSRC + lxvw4x vs13, r29, rSRC + lxvw4x vs14, r30, rSRC + lxvw4x vs15, r31, rSRC + # xor (encrypt/decrypt) + xxlxor vs0, vs0, vs32 + xxlxor vs1, vs1, vs36 + xxlxor vs2, vs2, vs40 + xxlxor vs3, vs3, vs44 + xxlxor vs4, vs4, vs33 + xxlxor vs5, vs5, vs37 + xxlxor vs6, vs6, vs41 + xxlxor vs7, vs7, vs45 + xxlxor vs8, vs8, vs34 + xxlxor vs9, vs9, vs38 + xxlxor vs10, vs10, vs42 + xxlxor vs11, vs11, vs46 + xxlxor vs12, vs12, vs35 + xxlxor vs13, vs13, vs39 + xxlxor vs14, vs14, vs43 + xxlxor vs15, vs15, vs47 + # store cipher/text + stxvw4x vs0, 0, rDST + stxvw4x vs1, r17, rDST + stxvw4x vs2, r18, rDST + stxvw4x vs3, r19, rDST + stxvw4x vs4, r20, rDST + stxvw4x vs5, r21, rDST + stxvw4x vs6, r22, rDST + stxvw4x vs7, r23, rDST + stxvw4x vs8, r24, rDST + stxvw4x vs9, r25, rDST + stxvw4x vs10, r26, rDST + stxvw4x vs11, r27, rDST + stxvw4x vs12, r28, rDST + stxvw4x vs13, r29, rDST + stxvw4x vs14, r30, rDST + stxvw4x vs15, r31, rDST + + # src/dst increment + addi rSRC, rSRC, 256 + addi rDST, rDST, 256 + + # counter increment + vadduwm v24, v24, v25 + + bdnz mainloop + +lastblock: + # reminder + mulld r16, r16, r15 + subf. r16, r16, rSIZE + + # check reminder + beq exitsub + + addi r14, r14, -256 + # last block x4 + # init 16 vectors (4 states x4) + vor v0, v20, v20 + vor v1, v21, v21 + vor v2, v22, v22 + vor v3, v23, v23 + vspltw v4, v17, v0 + vspltw v5, v17, v1 + vspltw v6, v17, v2 + vspltw v7, v17, v3 + vspltw v8, v18, v0 + vspltw v9, v18, v1 + vspltw v10, v18, v2 + vspltw v11, v18, v3 + vor v12, v24, v24 + vspltw v13, v19, v1 + vspltw v14, v19, v2 + vspltw v15, v19, v3 + + doubleround # 1 + doubleround # 2 + doubleround # 3 + doubleround # 4 + doubleround # 5 + doubleround # 6 + doubleround # 7 + doubleround # 8 + doubleround # 9 + doubleround # 10 + + vadduwm v12, v12, v24 + + convert 0 + convert 4 + convert 8 + convert 12 + + addition 0 + addition 1 + addition 2 + addition 3 + + # store vectors + stxvw4x vs32, 0, r14 + stxvw4x vs36, r17, r14 + stxvw4x vs40, r18, r14 + stxvw4x vs44, r19, r14 + stxvw4x vs33, r20, r14 + stxvw4x vs37, r21, r14 + stxvw4x vs41, r22, r14 + stxvw4x vs45, r23, r14 + stxvw4x vs34, r24, r14 + stxvw4x vs38, r25, r14 + stxvw4x vs42, r26, r14 + stxvw4x vs46, r27, r14 + stxvw4x vs35, r28, r14 + stxvw4x vs39, r29, r14 + stxvw4x vs43, r30, r14 + stxvw4x vs47, r31, r14 + + mtctr r16 + addi rSIZE, r14, -1 + addi rSRC, rSRC, -1 + addi rDST, rDST, -1 +xorlast: + lbzu r15, 1(rSIZE) + lbzu r16, 1(rSRC) + xor r15, r15, r16 + stbu r15, 1(rDST) + bdnz xorlast + + # zeroing last block + xxlxor vs0, vs0, vs0 + stxvw4x vs0, 0, r14 + stxvw4x vs0, r17, r14 + stxvw4x vs0, r18, r14 + stxvw4x vs0, r19, r14 + stxvw4x vs0, r20, r14 + stxvw4x vs0, r21, r14 + stxvw4x vs0, r22, r14 + stxvw4x vs0, r23, r14 + stxvw4x vs0, r24, r14 + stxvw4x vs0, r25, r14 + stxvw4x vs0, r26, r14 + stxvw4x vs0, r27, r14 + stxvw4x vs0, r28, r14 + stxvw4x vs0, r29, r14 + stxvw4x vs0, r30, r14 + stxvw4x vs0, r31, r14 + +exitsub: + # zeroing volatile registers + xxlxor vs0, vs0, vs0 + xxlxor vs1, vs1, vs1 + xxlxor vs2, vs2, vs2 + xxlxor vs3, vs3, vs3 + xxlxor vs4, vs4, vs4 + xxlxor vs5, vs5, vs5 + xxlxor vs6, vs6, vs6 + xxlxor vs7, vs7, vs7 + xxlxor vs8, vs8, vs8 + xxlxor vs9, vs9, vs9 + xxlxor vs10, vs10, vs10 + xxlxor vs11, vs11, vs11 + xxlxor vs12, vs12, vs12 + xxlxor vs13, vs13, vs13 + + xxlxor vs32, vs32, vs32 + xxlxor vs33, vs33, vs33 + xxlxor vs34, vs34, vs34 + xxlxor vs35, vs35, vs35 + xxlxor vs36, vs36, vs36 + xxlxor vs37, vs37, vs37 + xxlxor vs38, vs38, vs38 + xxlxor vs39, vs39, vs39 + xxlxor vs40, vs40, vs40 + xxlxor vs41, vs41, vs41 + xxlxor vs42, vs42, vs42 + xxlxor vs43, vs43, vs43 + xxlxor vs44, vs44, vs44 + xxlxor vs45, vs45, vs45 + xxlxor vs46, vs46, vs46 + xxlxor vs47, vs47, vs47 + xxlxor vs48, vs48, vs48 + xxlxor vs49, vs49, vs49 + xxlxor vs50, vs50, vs50 + xxlxor vs51, vs51, vs51 + + li rSIZE, 0 + li rDST, 0 + li rSRC, 0 + li rKEY, 0 + li rNONCE, 0 + li rCNTR, 0 + + # epilogue + addi r14, sp, -160 + + li r16, -16 + li r17, -32 + li r18, -48 + li r19, -64 + li r20, -80 + li r21, -96 + li r22, -112 + li r23, -128 + li r24, -144 + li r25, -160 + li r26, -176 + li r27, -192 + li r28, -208 + + # load f14, f15 + lxvw4x vs14, 0, r14 + lxvw4x vs15, r16, r14 + + # load v20 - v31 + lxvw4x vs52, r17, r14 + lxvw4x vs53, r18, r14 + lxvw4x vs54, r19, r14 + lxvw4x vs55, r20, r14 + lxvw4x vs56, r21, r14 + lxvw4x vs57, r22, r14 + lxvw4x vs58, r23, r14 + lxvw4x vs59, r24, r14 + lxvw4x vs60, r25, r14 + lxvw4x vs61, r26, r14 + lxvw4x vs62, r27, r14 + lxvw4x vs63, r28, r14 + + ld r14, -8(sp) + ld r15, -16(sp) + ld r16, -24(sp) + ld r17, -32(sp) + ld r18, -40(sp) + ld r19, -48(sp) + ld r20, -56(sp) + ld r21, -64(sp) + ld r22, -72(sp) + ld r23, -80(sp) + ld r24, -88(sp) + ld r25, -96(sp) + ld r26, -104(sp) + ld r27, -112(sp) + ld r28, -120(sp) + ld r29, -128(sp) + ld r30, -136(sp) + ld r31, -144(sp) + + blr diff --git a/security/nss/lib/freebl/chacha20poly1305-ppc.c b/security/nss/lib/freebl/chacha20poly1305-ppc.c new file mode 100644 index 0000000000..c9766cd403 --- /dev/null +++ b/security/nss/lib/freebl/chacha20poly1305-ppc.c @@ -0,0 +1,588 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_32.h" + +/* Forward declaration from chacha20-ppc64le.S */ +void chacha20vsx(uint32_t len, uint8_t *output, uint8_t *block, uint8_t *k, + uint8_t *nonce, uint32_t ctr); + +static inline void +poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + uint64_t *pre0 = ctx + (uint32_t)5U; + uint64_t *acc0 = ctx; + uint32_t nb = n * (uint32_t)16U / (uint32_t)16U; + uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = blocks + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = blocks + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(last[0U])); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(rem[0U])); + if (r > (uint32_t)0U) { + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_32( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + uint64_t ctx[25U] = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, k); + poly1305_padded_32(ctx, aadlen, aad); + poly1305_padded_32(ctx, mlen, m); + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_32_poly1305_finish(out, k, ctx); +} + +void +Chacha20Poly1305_vsx_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + chacha20vsx(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_32(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Chacha20Poly1305_vsx_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res; + } + uint8_t z = res; + if (z == (uint8_t)255U) { + chacha20vsx(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c new file mode 100644 index 0000000000..29bbc9d1c1 --- /dev/null +++ b/security/nss/lib/freebl/chacha20poly1305.c @@ -0,0 +1,549 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include + +#include "seccomon.h" +#include "secerr.h" +#include "blapit.h" +#include "blapii.h" +#include "chacha20poly1305.h" + +// There are three implementations of ChaCha20Poly1305: +// 1) 128-bit with AVX hardware acceleration used on x64 +// 2) 256-bit with AVX2 hardware acceleration used on x64 +// 3) 32-bit used on all other platforms + +// On x64 when AVX2 and other necessary registers are available, +// the 256bit-verctorized version will be used. When AVX2 features +// are unavailable or disabled but AVX registers are available, the +// 128bit-vectorized version will be used. In all other cases the +// scalar version of the HACL* code will be used. + +// Instead of including the headers (they bring other things we don't want), +// we declare the functions here. +// Usage is guarded by runtime checks of required hardware features. + +// Forward declaration from Hacl_Chacha20_Vec128.h and Hacl_Chacha20Poly1305_128.h. +extern void Hacl_Chacha20_Vec128_chacha20_encrypt_128(uint32_t len, uint8_t *out, + uint8_t *text, uint8_t *key, + uint8_t *n1, uint32_t ctr); +extern void +Hacl_Chacha20Poly1305_128_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); +extern uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); + +// Forward declaration from Hacl_Chacha20_Vec256.h and Hacl_Chacha20Poly1305_256.h. +extern void Hacl_Chacha20_Vec256_chacha20_encrypt_256(uint32_t len, uint8_t *out, + uint8_t *text, uint8_t *key, + uint8_t *n1, uint32_t ctr); +extern void +Hacl_Chacha20Poly1305_256_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); +extern uint32_t +Hacl_Chacha20Poly1305_256_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); + +// Forward declaration from Hacl_Chacha20.h and Hacl_Chacha20Poly1305_32.h. +extern void Hacl_Chacha20_chacha20_encrypt(uint32_t len, uint8_t *out, + uint8_t *text, uint8_t *key, + uint8_t *n1, uint32_t ctr); +extern void +Hacl_Chacha20Poly1305_32_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); +extern uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); + +// Forward declaration from chacha20-ppc64le.S +void chacha20vsx(uint32_t len, uint8_t *output, uint8_t *block, uint8_t *k, + uint8_t *nonce, uint32_t ctr); + +// Forward declaration from chacha20poly1305-ppc.c +extern void +Chacha20Poly1305_vsx_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); +extern uint32_t +Chacha20Poly1305_vsx_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, + uint8_t *aad, uint32_t mlen, uint8_t *m, + uint8_t *cipher, uint8_t *mac); + +SECStatus +ChaCha20_InitContext(ChaCha20Context *ctx, const unsigned char *key, + unsigned int keyLen, const unsigned char *nonce, + unsigned int nonceLen, PRUint32 ctr) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + if (keyLen != 32) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + ctx->counter = ctr; + PORT_Memcpy(ctx->key, key, sizeof(ctx->key)); + PORT_Memcpy(ctx->nonce, nonce, sizeof(ctx->nonce)); + + return SECSuccess; +#endif +} + +ChaCha20Context * +ChaCha20_CreateContext(const unsigned char *key, unsigned int keyLen, + const unsigned char *nonce, unsigned int nonceLen, + PRUint32 ctr) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return NULL; +#else + ChaCha20Context *ctx; + + ctx = PORT_New(ChaCha20Context); + if (ctx == NULL) { + return NULL; + } + + if (ChaCha20_InitContext(ctx, key, keyLen, nonce, nonceLen, ctr) != SECSuccess) { + PORT_Free(ctx); + ctx = NULL; + } + + return ctx; +#endif +} + +void +ChaCha20_DestroyContext(ChaCha20Context *ctx, PRBool freeit) +{ +#ifndef NSS_DISABLE_CHACHAPOLY + PORT_Memset(ctx, 0, sizeof(*ctx)); + if (freeit) { + PORT_Free(ctx); + } +#endif +} + +SECStatus +ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, + const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + if (keyLen != 32) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + if (tagLen != 16) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + PORT_Memcpy(ctx->key, key, sizeof(ctx->key)); + ctx->tagLen = tagLen; + + return SECSuccess; +#endif +} + +ChaCha20Poly1305Context * +ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return NULL; +#else + ChaCha20Poly1305Context *ctx; + + ctx = PORT_New(ChaCha20Poly1305Context); + if (ctx == NULL) { + return NULL; + } + + if (ChaCha20Poly1305_InitContext(ctx, key, keyLen, tagLen) != SECSuccess) { + PORT_Free(ctx); + ctx = NULL; + } + + return ctx; +#endif +} + +void +ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit) +{ +#ifndef NSS_DISABLE_CHACHAPOLY + PORT_Memset(ctx, 0, sizeof(*ctx)); + if (freeit) { + PORT_Free(ctx); + } +#endif +} + +#ifndef NSS_DISABLE_CHACHAPOLY +void +ChaCha20Xor(uint8_t *output, uint8_t *block, uint32_t len, uint8_t *k, + uint8_t *nonce, uint32_t ctr) +{ +#ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + Hacl_Chacha20_Vec256_chacha20_encrypt_256(len, output, block, k, nonce, ctr); + } +#endif + +#ifndef NSS_DISABLE_SSE3 + if (ssse3_support() && sse4_1_support() && avx_support()) { + Hacl_Chacha20_Vec128_chacha20_encrypt_128(len, output, block, k, nonce, ctr); + } +#endif + +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ + !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) + if (ppc_crypto_support()) { + chacha20vsx(len, output, block, k, nonce, ctr); + } else +#endif + { + Hacl_Chacha20_chacha20_encrypt(len, output, block, k, nonce, ctr); + } +} +#endif /* NSS_DISABLE_CHACHAPOLY */ + +SECStatus +ChaCha20_Xor(unsigned char *output, const unsigned char *block, unsigned int len, + const unsigned char *k, const unsigned char *nonce, PRUint32 ctr) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + // ChaCha has a 64 octet block, with a 32-bit block counter. + if (sizeof(len) > 4) { + unsigned long long len_ull = len; + if (len_ull >= (1ULL << (6 + 32))) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + ChaCha20Xor(output, (uint8_t *)block, len, (uint8_t *)k, + (uint8_t *)nonce, ctr); + return SECSuccess; +#endif +} + +SECStatus +ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + // ChaCha has a 64 octet block, with a 32-bit block counter. + if (sizeof(inputLen) > 4) { + unsigned long long inputLen_ull = inputLen; + if (inputLen_ull >= (1ULL << (6 + 32))) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + if (maxOutputLen < inputLen + ctx->tagLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + +#ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + Hacl_Chacha20Poly1305_256_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, output + inputLen); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 + if (ssse3_support() && sse4_1_support() && avx_support()) { + Hacl_Chacha20Poly1305_128_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, output + inputLen); + goto finish; + } +#endif + +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ + !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) + if (ppc_crypto_support()) { + Chacha20Poly1305_vsx_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, output + inputLen); + goto finish; + } +#endif + { + Hacl_Chacha20Poly1305_32_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, output + inputLen); + goto finish; + } + +finish: + *outputLen = inputLen + ctx->tagLen; + return SECSuccess; +#endif +} + +SECStatus +ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + unsigned int ciphertextLen; + + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (inputLen < ctx->tagLen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + ciphertextLen = inputLen - ctx->tagLen; + if (maxOutputLen < ciphertextLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + // ChaCha has a 64 octet block, with a 32-bit block counter. + if (inputLen >= (1ULL << (6 + 32)) + ctx->tagLen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + uint32_t res = 1; +#ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + res = Hacl_Chacha20Poly1305_256_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 + if (ssse3_support() && sse4_1_support() && avx_support()) { + res = Hacl_Chacha20Poly1305_128_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); + goto finish; + } +#endif + +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ + !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) + if (ppc_crypto_support()) { + res = Chacha20Poly1305_vsx_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); + goto finish; + } +#endif + { + res = Hacl_Chacha20Poly1305_32_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); + goto finish; + } + +finish: + if (res) { + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + + *outputLen = ciphertextLen; + return SECSuccess; +#endif +} + +SECStatus +ChaCha20Poly1305_Encrypt(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen, const unsigned char *nonce, + unsigned int nonceLen, const unsigned char *ad, + unsigned int adLen, unsigned char *outTag) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + // ChaCha has a 64 octet block, with a 32-bit block counter. + if (sizeof(inputLen) > 4) { + unsigned long long inputLen_ull = inputLen; + if (inputLen_ull >= (1ULL << (6 + 32))) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + +#ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + Hacl_Chacha20Poly1305_256_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, outTag); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 + if (ssse3_support() && sse4_1_support() && avx_support()) { + Hacl_Chacha20Poly1305_128_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, outTag); + goto finish; + } +#endif + + else +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ + !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) + if (ppc_crypto_support()) { + Chacha20Poly1305_vsx_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, outTag); + goto finish; + } else +#endif + { + Hacl_Chacha20Poly1305_32_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, outTag); + goto finish; + } + +finish: + *outputLen = inputLen; + return SECSuccess; +#endif +} + +SECStatus +ChaCha20Poly1305_Decrypt(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen, const unsigned char *nonce, + unsigned int nonceLen, const unsigned char *ad, + unsigned int adLen, const unsigned char *tagIn) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + unsigned int ciphertextLen; + + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + ciphertextLen = inputLen; + if (maxOutputLen < ciphertextLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + // ChaCha has a 64 octet block, with a 32-bit block counter. + if (sizeof(inputLen) > 4) { + unsigned long long inputLen_ull = inputLen; + if (inputLen_ull >= (1ULL << (6 + 32))) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + + uint32_t res = 1; +#ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + res = Hacl_Chacha20Poly1305_256_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 + if (ssse3_support() && sse4_1_support() && avx_support()) { + res = Hacl_Chacha20Poly1305_128_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); + goto finish; + } +#endif + +#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ + !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) + if (ppc_crypto_support()) { + res = Chacha20Poly1305_vsx_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); + goto finish; + } +#endif + { + res = Hacl_Chacha20Poly1305_32_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); + goto finish; + } + +finish: + if (res) { + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + + *outputLen = ciphertextLen; + return SECSuccess; +#endif +} diff --git a/security/nss/lib/freebl/chacha20poly1305.h b/security/nss/lib/freebl/chacha20poly1305.h new file mode 100644 index 0000000000..fff528af39 --- /dev/null +++ b/security/nss/lib/freebl/chacha20poly1305.h @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _CHACHA20_POLY1305_H_ +#define _CHACHA20_POLY1305_H_ 1 + +/* ChaCha20Poly1305ContextStr saves the key and tag length for a + * ChaCha20+Poly1305 AEAD operation. */ +struct ChaCha20Poly1305ContextStr { + unsigned char key[32]; + unsigned char tagLen; +}; + +struct ChaCha20ContextStr { + unsigned char key[32]; + unsigned char nonce[12]; + PRUint32 counter; +}; + +#endif /* _CHACHA20_POLY1305_H_ */ diff --git a/security/nss/lib/freebl/cmac.c b/security/nss/lib/freebl/cmac.c new file mode 100644 index 0000000000..222cef1b4c --- /dev/null +++ b/security/nss/lib/freebl/cmac.c @@ -0,0 +1,323 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "rijndael.h" +#include "blapi.h" +#include "cmac.h" +#include "secerr.h" +#include "nspr.h" + +struct CMACContextStr { + /* Information about the block cipher to use internally. The cipher should + * be placed in ECB mode so that we can use it to directly encrypt blocks. + * + * + * To add a new cipher, add an entry to CMACCipher, update CMAC_Init, + * cmac_Encrypt, and CMAC_Destroy methods to handle the new cipher, and + * add a new Context pointer to the cipher union with the correct type. */ + CMACCipher cipherType; + union { + AESContext *aes; + } cipher; + unsigned int blockSize; + + /* Internal keys which are conditionally used by the algorithm. Derived + * from encrypting the NULL block. We leave the storing of (and the + * cleanup of) the CMAC key to the underlying block cipher. */ + unsigned char k1[MAX_BLOCK_SIZE]; + unsigned char k2[MAX_BLOCK_SIZE]; + + /* When Update is called with data which isn't a multiple of the block + * size, we need a place to put it. HMAC handles this by passing it to + * the underlying hash function right away; we can't do that as the + * contract on the cipher object is different. */ + unsigned int partialIndex; + unsigned char partialBlock[MAX_BLOCK_SIZE]; + + /* Last encrypted block. This gets xor-ed with partialBlock prior to + * encrypting it. NIST defines this to be the empty string to begin. */ + unsigned char lastBlock[MAX_BLOCK_SIZE]; +}; + +static void +cmac_ShiftLeftOne(unsigned char *out, const unsigned char *in, int length) +{ + int i = 0; + for (; i < length - 1; i++) { + out[i] = in[i] << 1; + out[i] |= in[i + 1] >> 7; + } + out[i] = in[i] << 1; +} + +static SECStatus +cmac_Encrypt(CMACContext *ctx, unsigned char *output, + const unsigned char *input, + unsigned int inputLen) +{ + if (ctx->cipherType == CMAC_AES) { + unsigned int tmpOutputLen; + SECStatus rv = AES_Encrypt(ctx->cipher.aes, output, &tmpOutputLen, + ctx->blockSize, input, inputLen); + + /* Assumption: AES_Encrypt (when in ECB mode) always returns an + * output of length equal to blockSize (what was pass as the value + * of the maxOutputLen parameter). */ + PORT_Assert(tmpOutputLen == ctx->blockSize); + return rv; + } + + return SECFailure; +} + +/* NIST SP.800-38B, 6.1 Subkey Generation */ +static SECStatus +cmac_GenerateSubkeys(CMACContext *ctx) +{ + unsigned char null_block[MAX_BLOCK_SIZE] = { 0 }; + unsigned char L[MAX_BLOCK_SIZE]; + unsigned char v; + unsigned char i; + + /* Step 1: L = AES(key, null_block) */ + if (cmac_Encrypt(ctx, L, null_block, ctx->blockSize) != SECSuccess) { + return SECFailure; + } + + /* In the following, some effort has been made to be constant time. Rather + * than conditioning on the value of the MSB (of L or K1), we use the loop + * to build a mask for the conditional constant. */ + + /* Step 2: If MSB(L) = 0, K1 = L << 1. Else, K1 = (L << 1) ^ R_b. */ + cmac_ShiftLeftOne(ctx->k1, L, ctx->blockSize); + v = L[0] >> 7; + for (i = 1; i <= 7; i <<= 1) { + v |= (v << i); + } + ctx->k1[ctx->blockSize - 1] ^= (0x87 & v); + + /* Step 3: If MSB(K1) = 0, K2 = K1 << 1. Else, K2 = (K1 <, 1) ^ R_b. */ + cmac_ShiftLeftOne(ctx->k2, ctx->k1, ctx->blockSize); + v = ctx->k1[0] >> 7; + for (i = 1; i <= 7; i <<= 1) { + v |= (v << i); + } + ctx->k2[ctx->blockSize - 1] ^= (0x87 & v); + + /* Any intermediate value in the computation of the subkey shall be + * secret. */ + PORT_Memset(null_block, 0, MAX_BLOCK_SIZE); + PORT_Memset(L, 0, MAX_BLOCK_SIZE); + + /* Step 4: Return the values. */ + return SECSuccess; +} + +/* NIST SP.800-38B, 6.2 MAC Generation step 6 */ +static SECStatus +cmac_UpdateState(CMACContext *ctx) +{ + if (ctx == NULL || ctx->partialIndex != ctx->blockSize) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Step 6: C_i = CIPHER(key, C_{i-1} ^ M_i) for 1 <= i <= n, and + * C_0 is defined as the empty string. */ + + for (unsigned int index = 0; index < ctx->blockSize; index++) { + ctx->partialBlock[index] ^= ctx->lastBlock[index]; + } + + return cmac_Encrypt(ctx, ctx->lastBlock, ctx->partialBlock, ctx->blockSize); +} + +SECStatus +CMAC_Init(CMACContext *ctx, CMACCipher type, + const unsigned char *key, unsigned int key_len) +{ + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + + /* We only currently support AES-CMAC. */ + if (type != CMAC_AES) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + PORT_Memset(ctx, 0, sizeof(*ctx)); + + ctx->blockSize = AES_BLOCK_SIZE; + ctx->cipherType = CMAC_AES; + ctx->cipher.aes = AES_CreateContext(key, NULL, NSS_AES, 1, key_len, + ctx->blockSize); + if (ctx->cipher.aes == NULL) { + return SECFailure; + } + + return CMAC_Begin(ctx); +} + +CMACContext * +CMAC_Create(CMACCipher type, const unsigned char *key, + unsigned int key_len) +{ + CMACContext *result = PORT_New(CMACContext); + + if (CMAC_Init(result, type, key, key_len) != SECSuccess) { + CMAC_Destroy(result, PR_TRUE); + return NULL; + } + + return result; +} + +SECStatus +CMAC_Begin(CMACContext *ctx) +{ + if (ctx == NULL) { + return SECFailure; + } + + /* Ensure that our blockSize is less than the maximum. When this fails, + * a cipher with a larger block size was added and MAX_BLOCK_SIZE needs + * to be updated accordingly. */ + PORT_Assert(ctx->blockSize <= MAX_BLOCK_SIZE); + + if (cmac_GenerateSubkeys(ctx) != SECSuccess) { + return SECFailure; + } + + /* Set the index to write partial blocks at to zero. This saves us from + * having to clear ctx->partialBlock. */ + ctx->partialIndex = 0; + + /* Step 5: Let C_0 = 0^b. */ + PORT_Memset(ctx->lastBlock, 0, ctx->blockSize); + + return SECSuccess; +} + +/* NIST SP.800-38B, 6.2 MAC Generation */ +SECStatus +CMAC_Update(CMACContext *ctx, const unsigned char *data, + unsigned int data_len) +{ + unsigned int data_index = 0; + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (data == NULL || data_len == 0) { + return SECSuccess; + } + + /* Copy as many bytes from data into ctx->partialBlock as we can, up to + * the maximum of the remaining data and the remaining space in + * ctx->partialBlock. + * + * Note that we swap the order (encrypt *then* copy) because the last + * block is different from the rest. If we end on an even multiple of + * the block size, we have to be able to XOR it with K1. But we won't know + * that it is the last until CMAC_Finish is called (and by then, CMAC_Update + * has already returned). */ + while (data_index < data_len) { + if (ctx->partialIndex == ctx->blockSize) { + if (cmac_UpdateState(ctx) != SECSuccess) { + return SECFailure; + } + + ctx->partialIndex = 0; + } + + unsigned int copy_len = data_len - data_index; + if (copy_len > (ctx->blockSize - ctx->partialIndex)) { + copy_len = ctx->blockSize - ctx->partialIndex; + } + + PORT_Memcpy(ctx->partialBlock + ctx->partialIndex, data + data_index, copy_len); + data_index += copy_len; + ctx->partialIndex += copy_len; + } + + return SECSuccess; +} + +/* NIST SP.800-38B, 6.2 MAC Generation */ +SECStatus +CMAC_Finish(CMACContext *ctx, unsigned char *result, + unsigned int *result_len, + unsigned int max_result_len) +{ + if (ctx == NULL || result == NULL || max_result_len == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (max_result_len > ctx->blockSize) { + /* This is a weird situation. The PKCS #11 soft tokencode passes + * sizeof(result) here, which is hard-coded as SFTK_MAX_MAC_LENGTH. + * This later gets truncated to min(SFTK_MAX_MAC_LENGTH, requested). */ + max_result_len = ctx->blockSize; + } + + /* Step 4: If M_n* is a complete block, M_n = K1 ^ M_n*. Else, + * M_n = K2 ^ (M_n* || 10^j). */ + if (ctx->partialIndex == ctx->blockSize) { + /* XOR in K1. */ + for (unsigned int index = 0; index < ctx->blockSize; index++) { + ctx->partialBlock[index] ^= ctx->k1[index]; + } + } else { + /* Use 10* padding on the partial block. */ + ctx->partialBlock[ctx->partialIndex++] = 0x80; + PORT_Memset(ctx->partialBlock + ctx->partialIndex, 0, + ctx->blockSize - ctx->partialIndex); + ctx->partialIndex = ctx->blockSize; + + /* XOR in K2. */ + for (unsigned int index = 0; index < ctx->blockSize; index++) { + ctx->partialBlock[index] ^= ctx->k2[index]; + } + } + + /* Encrypt the block. */ + if (cmac_UpdateState(ctx) != SECSuccess) { + return SECFailure; + } + + /* Step 7 & 8: T = MSB_tlen(C_n); return T. */ + PORT_Memcpy(result, ctx->lastBlock, max_result_len); + if (result_len != NULL) { + *result_len = max_result_len; + } + return SECSuccess; +} + +void +CMAC_Destroy(CMACContext *ctx, PRBool free_it) +{ + if (ctx == NULL) { + return; + } + + if (ctx->cipherType == CMAC_AES && ctx->cipher.aes != NULL) { + AES_DestroyContext(ctx->cipher.aes, PR_TRUE); + } + + /* Destroy everything in the context. This includes sensitive data in + * K1, K2, and lastBlock. */ + PORT_Memset(ctx, 0, sizeof(*ctx)); + + if (free_it == PR_TRUE) { + PORT_Free(ctx); + } +} diff --git a/security/nss/lib/freebl/cmac.h b/security/nss/lib/freebl/cmac.h new file mode 100644 index 0000000000..6a6f42c797 --- /dev/null +++ b/security/nss/lib/freebl/cmac.h @@ -0,0 +1,47 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _CMAC_H_ +#define _CMAC_H_ + +typedef struct CMACContextStr CMACContext; + +SEC_BEGIN_PROTOS + +/* Enum for identifying the underlying block cipher we're using internally. */ +typedef enum { + CMAC_AES = 0 +} CMACCipher; + +/* Initialize an existing CMACContext struct. */ +SECStatus CMAC_Init(CMACContext *ctx, CMACCipher type, + const unsigned char *key, unsigned int key_len); + +/* Allocate and initialize a new CMAC context with the specified cipher and + * key. */ +CMACContext *CMAC_Create(CMACCipher type, const unsigned char *key, + unsigned int key_len); + +/* Called automatically by CMAC_*{Create,Init}(...). Only useful for restarting + * an already-started CMAC instance. */ +SECStatus CMAC_Begin(CMACContext *ctx); + +/* Add the specified bytes into the CMAC state. */ +SECStatus CMAC_Update(CMACContext *ctx, const unsigned char *data, + unsigned int data_len); + +/* Finalize the CMAC state and return the result. */ +SECStatus CMAC_Finish(CMACContext *ctx, unsigned char *result, + unsigned int *result_len, + unsigned int max_result_len); + +/* Note: CMAC_Clone isn't implemented here because AES doesn't expose a + * context-cloning operation. */ + +/* Destroy a CMAC context, optionally freeing it. */ +void CMAC_Destroy(CMACContext *ctx, PRBool free_it); + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/config.mk b/security/nss/lib/freebl/config.mk new file mode 100644 index 0000000000..a4182a4186 --- /dev/null +++ b/security/nss/lib/freebl/config.mk @@ -0,0 +1,93 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# only do this in the outermost freebl build. +ifndef FREEBL_CHILD_BUILD + +# We're going to change this build so that it builds libfreebl.a with +# just loader.c. Then we have to build this directory twice again to +# build the two DSOs. +# To build libfreebl.a with just loader.c, we must now override many +# of the make variables setup by the prior inclusion of CORECONF's config.mk + +CSRCS = loader.c +SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX)) +OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS)) +ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \ + $(NOSUCHFILE) so_locations + +# this is not a recursive child make. We make a static lib. (archive) + +# Override the values defined in coreconf's ruleset.mk. +# +# - (1) LIBRARY: a static (archival) library +# - (2) SHARED_LIBRARY: a shared (dynamic link) library +# - (3) IMPORT_LIBRARY: an import library, used only on Windows +# - (4) PROGRAM: an executable binary +# +# override these variables to prevent building a DSO/DLL. + TARGETS = $(LIBRARY) + SHARED_LIBRARY = + IMPORT_LIBRARY = + PROGRAM = + +else + +# This is a recursive child make. We build the shared lib. + +TARGETS = $(SHARED_LIBRARY) +LIBRARY = +IMPORT_LIBRARY = +PROGRAM = + +ifeq ($(OS_TARGET), SunOS) +OS_LIBS += -lkstat +endif + +ifeq (,$(filter-out WIN%,$(OS_TARGET))) + +RESNAME = freebl.rc + +ifdef NS_USE_GCC +OS_LIBS += -ladvapi32 +else +OS_LIBS += advapi32.lib +endif + +ifdef NS_USE_GCC +EXTRA_SHARED_LIBS += \ + -L$(DIST)/lib \ + -L$(NSSUTIL_LIB_DIR) \ + -lnssutil3 \ + -L$(NSPR_LIB_DIR) \ + -lnspr4 \ + $(NULL) +else # ! NS_USE_GCC +EXTRA_SHARED_LIBS += \ + $(DIST)/lib/nssutil3.lib \ + $(NSPR_LIB_DIR)/$(NSPR31_LIB_PREFIX)nspr4.lib \ + $(NULL) +endif # NS_USE_GCC + +else + +ifeq ($(FREEBL_NO_DEPEND),1) +#drop pthreads as well +OS_PTHREAD= +else +EXTRA_SHARED_LIBS += \ + -L$(DIST)/lib \ + -L$(NSSUTIL_LIB_DIR) \ + -lnssutil3 \ + -L$(NSPR_LIB_DIR) \ + -lnspr4 \ + $(NULL) +endif +endif + +ifeq ($(OS_ARCH), Darwin) +EXTRA_SHARED_LIBS += -dylib_file @executable_path/libplc4.dylib:$(DIST)/lib/libplc4.dylib -dylib_file @executable_path/libplds4.dylib:$(DIST)/lib/libplds4.dylib +endif + +endif diff --git a/security/nss/lib/freebl/crypto_primitives.c b/security/nss/lib/freebl/crypto_primitives.c new file mode 100644 index 0000000000..425f9fcc80 --- /dev/null +++ b/security/nss/lib/freebl/crypto_primitives.c @@ -0,0 +1,36 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +/* This file holds useful functions and macros for crypto code. */ +#include "crypto_primitives.h" + +/* + * FREEBL_HTONLL(x): swap bytes in a 64-bit integer. + */ +#if defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64)) + +__inline__ PRUint64 +swap8b(PRUint64 value) +{ + __asm__("bswapq %0" + : "+r"(value)); + return (value); +} + +#elif defined(IS_LITTLE_ENDIAN) && !defined(_MSC_VER) && !__has_builtin(__builtin_bswap64) && !((defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))) + +PRUint64 +swap8b(PRUint64 x) +{ + PRUint64 t1 = x; + t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8); + t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16); + return (t1 >> 32) | (t1 << 32); +} + +#endif diff --git a/security/nss/lib/freebl/crypto_primitives.h b/security/nss/lib/freebl/crypto_primitives.h new file mode 100644 index 0000000000..86a6927ad6 --- /dev/null +++ b/security/nss/lib/freebl/crypto_primitives.h @@ -0,0 +1,66 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file holds useful functions and macros for crypto code. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include "prtypes.h" + +/* For non-clang platform */ +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +/* Unfortunately this isn't always set when it should be. */ +#if defined(HAVE_LONG_LONG) + +/* + * ROTR64/ROTL64(x, n): rotate a 64-bit integer x by n bites to the right/left. + */ +#if defined(_MSC_VER) +#pragma intrinsic(_rotr64, _rotl64) +#define ROTR64(x, n) _rotr64((x), (n)) +#define ROTL64(x, n) _rotl64((x), (n)) +#else +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#endif + +/* + * FREEBL_HTONLL(x): swap bytes in a 64-bit integer. + */ +#if defined(IS_LITTLE_ENDIAN) +#if defined(_MSC_VER) + +#pragma intrinsic(_byteswap_uint64) +#define FREEBL_HTONLL(x) _byteswap_uint64(x) + +/* gcc doesn't have __has_builtin, but it does have __builtin_bswap64 */ +#elif __has_builtin(__builtin_bswap64) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) + +#define FREEBL_HTONLL(x) __builtin_bswap64(x) + +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64)) + +PRUint64 swap8b(PRUint64 value); +#define FREEBL_HTONLL(x) swap8b(x) + +#else + +#define SHA_MASK16 0x0000FFFF0000FFFFULL +#define SHA_MASK8 0x00FF00FF00FF00FFULL +PRUint64 swap8b(PRUint64 x); +#define FREEBL_HTONLL(x) swap8b(x) + +#endif /* _MSC_VER */ + +#else /* IS_LITTLE_ENDIAN */ +#define FREEBL_HTONLL(x) (x) +#endif + +#endif /* HAVE_LONG_LONG */ diff --git a/security/nss/lib/freebl/ctr.c b/security/nss/lib/freebl/ctr.c new file mode 100644 index 0000000000..239a60da24 --- /dev/null +++ b/security/nss/lib/freebl/ctr.c @@ -0,0 +1,276 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "prtypes.h" +#include "blapit.h" +#include "blapii.h" +#include "ctr.h" +#include "pkcs11t.h" +#include "secerr.h" + +#ifdef USE_HW_AES +#ifdef NSS_X86_OR_X64 +#include "intel-aes.h" +#endif +#include "rijndael.h" +#endif + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#endif + +SECStatus +CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher, + const unsigned char *param) +{ + const CK_AES_CTR_PARAMS *ctrParams = (const CK_AES_CTR_PARAMS *)param; + + if (ctrParams->ulCounterBits == 0 || + ctrParams->ulCounterBits > AES_BLOCK_SIZE * PR_BITS_PER_BYTE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Invariant: 0 < ctr->bufPtr <= AES_BLOCK_SIZE */ + ctr->checkWrap = PR_FALSE; + ctr->bufPtr = AES_BLOCK_SIZE; /* no unused data in the buffer */ + ctr->cipher = cipher; + ctr->context = context; + ctr->counterBits = ctrParams->ulCounterBits; + if (AES_BLOCK_SIZE > sizeof(ctr->counter) || + AES_BLOCK_SIZE > sizeof(ctrParams->cb)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + PORT_Memcpy(ctr->counter, ctrParams->cb, AES_BLOCK_SIZE); + if (ctr->counterBits < 64) { + PORT_Memcpy(ctr->counterFirst, ctr->counter, AES_BLOCK_SIZE); + ctr->checkWrap = PR_TRUE; + } + return SECSuccess; +} + +CTRContext * +CTR_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *param) +{ + CTRContext *ctr; + SECStatus rv; + + /* first fill in the Counter context */ + ctr = PORT_ZNew(CTRContext); + if (ctr == NULL) { + return NULL; + } + rv = CTR_InitContext(ctr, context, cipher, param); + if (rv != SECSuccess) { + CTR_DestroyContext(ctr, PR_TRUE); + ctr = NULL; + } + return ctr; +} + +void +CTR_DestroyContext(CTRContext *ctr, PRBool freeit) +{ + PORT_Memset(ctr, 0, sizeof(CTRContext)); + if (freeit) { + PORT_Free(ctr); + } +} + +/* + * Used by counter mode. Increment the counter block. Not all bits in the + * counter block are part of the counter, counterBits tells how many bits + * are part of the counter. The counter block is blocksize long. It's a + * big endian value. + * + * XXX Does not handle counter rollover. + */ +static void +ctr_GetNextCtr(unsigned char *counter, unsigned int counterBits, + unsigned int blocksize) +{ + unsigned char *counterPtr = counter + blocksize - 1; + unsigned char mask, count; + + PORT_Assert(counterBits <= blocksize * PR_BITS_PER_BYTE); + while (counterBits >= PR_BITS_PER_BYTE) { + if (++(*(counterPtr--))) { + return; + } + counterBits -= PR_BITS_PER_BYTE; + } + if (counterBits == 0) { + return; + } + /* increment the final partial byte */ + mask = (1 << counterBits) - 1; + count = ++(*counterPtr) & mask; + *counterPtr = ((*counterPtr) & ~mask) | count; + return; +} + +static void +ctr_xor(unsigned char *target, const unsigned char *x, + const unsigned char *y, unsigned int count) +{ + unsigned int i; +#if defined(__ARM_NEON) || defined(__ARM_NEON__) + while (count >= 16) { + vst1q_u8(target, veorq_u8(vld1q_u8(x), vld1q_u8(y))); + target += 16; + x += 16; + y += 16; + count -= 16; + } +#endif + for (i = 0; i < count; i++) { + *target++ = *x++ ^ *y++; + } +} + +SECStatus +CTR_Update(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tmp; + SECStatus rv; + + // Limit block count to 2^counterBits - 2 + if (ctr->counterBits < (sizeof(unsigned int) * 8) && + inlen > ((1 << ctr->counterBits) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outlen = 0; + if (ctr->bufPtr != blocksize) { + unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen); + ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed); + ctr->bufPtr += needed; + outbuf += needed; + inbuf += needed; + *outlen += needed; + inlen -= needed; + if (inlen == 0) { + return SECSuccess; + } + PORT_Assert(ctr->bufPtr == blocksize); + } + + while (inlen >= blocksize) { + rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize, + ctr->counter, blocksize, blocksize); + ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize); + if (ctr->checkWrap) { + if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } + if (rv != SECSuccess) { + return SECFailure; + } + ctr_xor(outbuf, inbuf, ctr->buffer, blocksize); + outbuf += blocksize; + inbuf += blocksize; + *outlen += blocksize; + inlen -= blocksize; + } + if (inlen == 0) { + return SECSuccess; + } + rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize, + ctr->counter, blocksize, blocksize); + ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize); + if (ctr->checkWrap) { + if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } + if (rv != SECSuccess) { + return SECFailure; + } + ctr_xor(outbuf, inbuf, ctr->buffer, inlen); + ctr->bufPtr = inlen; + *outlen += inlen; + return SECSuccess; +} + +#if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64) +SECStatus +CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int fullblocks; + unsigned int tmp; + SECStatus rv; + + // Limit block count to 2^counterBits - 2 + if (ctr->counterBits < (sizeof(unsigned int) * 8) && + inlen > ((1 << ctr->counterBits) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outlen = 0; + if (ctr->bufPtr != blocksize) { + unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen); + ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed); + ctr->bufPtr += needed; + outbuf += needed; + inbuf += needed; + *outlen += needed; + inlen -= needed; + if (inlen == 0) { + return SECSuccess; + } + PORT_Assert(ctr->bufPtr == blocksize); + } + + if (inlen >= blocksize) { + rv = intel_aes_ctr_worker(((AESContext *)(ctr->context))->Nr)( + ctr, outbuf, outlen, maxout, inbuf, inlen, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + fullblocks = (inlen / blocksize) * blocksize; + *outlen += fullblocks; + outbuf += fullblocks; + inbuf += fullblocks; + inlen -= fullblocks; + } + + if (inlen == 0) { + return SECSuccess; + } + rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize, + ctr->counter, blocksize, blocksize); + ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + ctr_xor(outbuf, inbuf, ctr->buffer, inlen); + ctr->bufPtr = inlen; + *outlen += inlen; + return SECSuccess; +} +#endif diff --git a/security/nss/lib/freebl/ctr.h b/security/nss/lib/freebl/ctr.h new file mode 100644 index 0000000000..a397e690e6 --- /dev/null +++ b/security/nss/lib/freebl/ctr.h @@ -0,0 +1,52 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef CTR_H +#define CTR_H 1 + +#include "blapii.h" + +/* This structure is defined in this header because both ctr.c and gcm.c + * need it. */ +struct CTRContextStr { + freeblCipherFunc cipher; + void *context; + unsigned char counter[MAX_BLOCK_SIZE]; + unsigned char buffer[MAX_BLOCK_SIZE]; + unsigned char counterFirst[MAX_BLOCK_SIZE]; /* counter overlfow value */ + PRBool checkWrap; /*check for counter overflow*/ + unsigned long counterBits; + unsigned int bufPtr; +}; + +typedef struct CTRContextStr CTRContext; + +SECStatus CTR_InitContext(CTRContext *ctr, void *context, + freeblCipherFunc cipher, const unsigned char *param); + +/* + * The context argument is the inner cipher context to use with cipher. The + * CTRContext does not own context. context needs to remain valid for as long + * as the CTRContext is valid. + * + * The cipher argument is a block cipher in the ECB encrypt mode. + */ +CTRContext *CTR_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *param); + +void CTR_DestroyContext(CTRContext *ctr, PRBool freeit); + +SECStatus CTR_Update(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +#ifdef USE_HW_AES +SECStatus CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +#endif + +#endif diff --git a/security/nss/lib/freebl/cts.c b/security/nss/lib/freebl/cts.c new file mode 100644 index 0000000000..774294b7a1 --- /dev/null +++ b/security/nss/lib/freebl/cts.c @@ -0,0 +1,303 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "blapit.h" +#include "blapii.h" +#include "cts.h" +#include "secerr.h" + +struct CTSContextStr { + freeblCipherFunc cipher; + void *context; + /* iv stores the last ciphertext block of the previous message. + * Only used by decrypt. */ + unsigned char iv[MAX_BLOCK_SIZE]; +}; + +CTSContext * +CTS_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *iv) +{ + CTSContext *cts; + + cts = PORT_ZNew(CTSContext); + if (cts == NULL) { + return NULL; + } + PORT_Memcpy(cts->iv, iv, MAX_BLOCK_SIZE); + cts->cipher = cipher; + cts->context = context; + return cts; +} + +void +CTS_DestroyContext(CTSContext *cts, PRBool freeit) +{ + if (freeit) { + PORT_Free(cts); + } +} + +/* + * See addemdum to NIST SP 800-38A + * Generically handle cipher text stealing. Basically this is doing CBC + * operations except someone can pass us a partial block. + * + * Output Order: + * CS-1: C1||C2||C3..Cn-1(could be partial)||Cn (NIST) + * CS-2: pad == 0 C1||C2||C3...Cn-1(is full)||Cn (Schneier) + * CS-2: pad != 0 C1||C2||C3...Cn||Cn-1(is partial)(Schneier) + * CS-3: C1||C2||C3...Cn||Cn-1(could be partial) (Kerberos) + * + * The characteristics of these three options: + * - NIST & Schneier (CS-1 & CS-2) are identical to CBC if there are no + * partial blocks on input. + * - Scheier and Kerberos (CS-2 and CS-3) have no embedded partial blocks, + * which make decoding easier. + * - NIST & Kerberos (CS-1 and CS-3) have consistent block order independent + * of padding. + * + * PKCS #11 did not specify which version to implement, but points to the NIST + * spec, so this code implements CTS-CS-1 from NIST. + * + * To convert the returned buffer to: + * CS-2 (Schneier): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = *outlen % blocksize; + * if (pad) { + * memcpy(tmp, outbuf+*outlen-blocksize, blocksize); + * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad); + * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize); + * } + * CS-3 (Kerberos): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = *outlen % blocksize; + * if (pad == 0) { + * pad = blocksize; + * } + * memcpy(tmp, outbuf+*outlen-blocksize, blocksize); + * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad); + * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize); + */ +SECStatus +CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned char lastBlock[MAX_BLOCK_SIZE]; + unsigned int tmp; + int fullblocks; + int written; + unsigned char *saveout = outbuf; + SECStatus rv; + + if (inlen < blocksize) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + fullblocks = (inlen / blocksize) * blocksize; + rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf, + fullblocks, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + *outlen = fullblocks; /* AES low level doesn't set outlen */ + inbuf += fullblocks; + inlen -= fullblocks; + if (inlen == 0) { + return SECSuccess; + } + written = *outlen - (blocksize - inlen); + outbuf += written; + maxout -= written; + + /* + * here's the CTS magic, we pad our final block with zeros, + * then do a CBC encrypt. CBC will xor our plain text with + * the previous block (Cn-1), capturing part of that block (Cn-1**) as it + * xors with the zero pad. We then write this full block, overwritting + * (Cn-1**) in our buffer. This allows us to have input data == output + * data since Cn contains enough information to reconver Cn-1** when + * we decrypt (at the cost of some complexity as you can see in decrypt + * below */ + PORT_Memcpy(lastBlock, inbuf, inlen); + PORT_Memset(lastBlock + inlen, 0, blocksize - inlen); + rv = (*cts->cipher)(cts->context, outbuf, &tmp, maxout, lastBlock, + blocksize, blocksize); + PORT_Memset(lastBlock, 0, blocksize); + if (rv == SECSuccess) { + *outlen = written + blocksize; + } else { + PORT_Memset(saveout, 0, written + blocksize); + } + return rv; +} + +#define XOR_BLOCK(x, y, count) \ + for (i = 0; i < count; i++) \ + x[i] = x[i] ^ y[i] + +/* + * See addemdum to NIST SP 800-38A + * Decrypt, Expect CS-1: input. See the comment on the encrypt side + * to understand what CS-2 and CS-3 mean. + * + * To convert the input buffer to CS-1 from ... + * CS-2 (Schneier): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = inlen % blocksize; + * if (pad) { + * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize); + * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad); + * memcpy(inbuf+inlen-blocksize, tmp, blocksize); + * } + * CS-3 (Kerberos): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = inlen % blocksize; + * if (pad == 0) { + * pad = blocksize; + * } + * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize); + * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad); + * memcpy(inbuf+inlen-blocksize, tmp, blocksize); + */ +SECStatus +CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned char *Pn; + unsigned char Cn_2[MAX_BLOCK_SIZE]; /* block Cn-2 */ + unsigned char Cn_1[MAX_BLOCK_SIZE]; /* block Cn-1 */ + unsigned char Cn[MAX_BLOCK_SIZE]; /* block Cn */ + unsigned char lastBlock[MAX_BLOCK_SIZE]; + const unsigned char *tmp; + unsigned char *saveout = outbuf; + unsigned int tmpLen; + unsigned int fullblocks, pad; + unsigned int i; + SECStatus rv; + + if (inlen < blocksize) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + fullblocks = (inlen / blocksize) * blocksize; + + /* even though we expect the input to be CS-1, CS-2 is easier to parse, + * so convert to CS-2 immediately. NOTE: this is the same code as in + * the comment for encrypt. NOTE2: since we can't modify inbuf unless + * inbuf and outbuf overlap, just copy inbuf to outbuf and modify it there + */ + pad = inlen - fullblocks; + if (pad != 0) { + if (inbuf != outbuf) { + memcpy(outbuf, inbuf, inlen); + /* keep the names so we logically know how we are using the + * buffers */ + inbuf = outbuf; + } + memcpy(lastBlock, inbuf + inlen - blocksize, blocksize); + /* we know inbuf == outbuf now, inbuf is declared const and can't + * be the target, so use outbuf for the target here */ + memcpy(outbuf + inlen - pad, inbuf + inlen - blocksize - pad, pad); + memcpy(outbuf + inlen - blocksize - pad, lastBlock, blocksize); + } + /* save the previous to last block so we can undo the misordered + * chaining */ + tmp = (fullblocks < blocksize * 2) ? cts->iv : inbuf + fullblocks - blocksize * 2; + PORT_Memcpy(Cn_2, tmp, blocksize); + PORT_Memcpy(Cn, inbuf + fullblocks - blocksize, blocksize); + rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf, + fullblocks, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + *outlen = fullblocks; /* AES low level doesn't set outlen */ + inbuf += fullblocks; + inlen -= fullblocks; + if (inlen == 0) { + return SECSuccess; + } + outbuf += fullblocks; + + /* recover the stolen text */ + PORT_Memset(lastBlock, 0, blocksize); + PORT_Memcpy(lastBlock, inbuf, inlen); + PORT_Memcpy(Cn_1, inbuf, inlen); + Pn = outbuf - blocksize; + /* inbuf points to Cn-1* in the input buffer */ + /* NOTE: below there are 2 sections marked "make up for the out of order + * cbc decryption". You may ask, what is going on here. + * Short answer: CBC automatically xors the plain text with the previous + * encrypted block. We are decrypting the last 2 blocks out of order, so + * we have to 'back out' the decrypt xor and 'add back' the encrypt xor. + * Long answer: When we encrypted, we encrypted as follows: + * Pn-2, Pn-1, (Pn || 0), but on decryption we can't + * decrypt Cn-1 until we decrypt Cn because part of Cn-1 is stored in + * Cn (see below). So above we decrypted all the full blocks: + * Cn-2, Cn, + * to get: + * Pn-2, Pn, Except that Pn is not yet corect. On encrypt, we + * xor'd Pn || 0 with Cn-1, but on decrypt we xor'd it with Cn-2 + * To recover Pn, we xor the block with Cn-1* || 0 (in last block) and + * Cn-2 to get Pn || Cn-1**. Pn can then be written to the output buffer + * and we can now reunite Cn-1. With the full Cn-1 we can decrypt it, + * but now decrypt is going to xor the decrypted data with Cn instead of + * Cn-2. xoring Cn and Cn-2 restores the original Pn-1 and we can now + * write that oout to the buffer */ + + /* make up for the out of order CBC decryption */ + XOR_BLOCK(lastBlock, Cn_2, blocksize); + XOR_BLOCK(lastBlock, Pn, blocksize); + /* last buf now has Pn || Cn-1**, copy out Pn */ + PORT_Memcpy(outbuf, lastBlock, inlen); + *outlen += inlen; + /* copy Cn-1* into last buf to recover Cn-1 */ + PORT_Memcpy(lastBlock, Cn_1, inlen); + /* note: because Cn and Cn-1 were out of order, our pointer to Pn also + * points to where Pn-1 needs to reside. From here on out read Pn in + * the code as really Pn-1. */ + rv = (*cts->cipher)(cts->context, Pn, &tmpLen, blocksize, lastBlock, + blocksize, blocksize); + if (rv != SECSuccess) { + PORT_Memset(lastBlock, 0, blocksize); + PORT_Memset(saveout, 0, *outlen); + return SECFailure; + } + /* make up for the out of order CBC decryption */ + XOR_BLOCK(Pn, Cn_2, blocksize); + XOR_BLOCK(Pn, Cn, blocksize); + /* reset iv to Cn */ + PORT_Memcpy(cts->iv, Cn, blocksize); + /* This makes Cn the last block for the next decrypt operation, which + * matches the encrypt. We don't care about the contexts of last block, + * only the side effect of setting the internal IV */ + (void)(*cts->cipher)(cts->context, lastBlock, &tmpLen, blocksize, Cn, + blocksize, blocksize); + /* clear last block. At this point last block contains Pn xor Cn_1 xor + * Cn_2, both of with an attacker would know, so we need to clear this + * buffer out */ + PORT_Memset(lastBlock, 0, blocksize); + /* Cn, Cn_1, and Cn_2 have encrypted data, so no need to clear them */ + return SECSuccess; +} diff --git a/security/nss/lib/freebl/cts.h b/security/nss/lib/freebl/cts.h new file mode 100644 index 0000000000..ddd56197f6 --- /dev/null +++ b/security/nss/lib/freebl/cts.h @@ -0,0 +1,33 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef CTS_H +#define CTS_H 1 + +#include "blapii.h" + +typedef struct CTSContextStr CTSContext; + +/* + * The context argument is the inner cipher context to use with cipher. The + * CTSContext does not own context. context needs to remain valid for as long + * as the CTSContext is valid. + * + * The cipher argument is a block cipher in the CBC mode. + */ +CTSContext *CTS_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *iv); + +void CTS_DestroyContext(CTSContext *cts, PRBool freeit); + +SECStatus CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +#endif diff --git a/security/nss/lib/freebl/deprecated/alg2268.c b/security/nss/lib/freebl/deprecated/alg2268.c new file mode 100644 index 0000000000..ac97363099 --- /dev/null +++ b/security/nss/lib/freebl/deprecated/alg2268.c @@ -0,0 +1,509 @@ +/* + * alg2268.c - implementation of the algorithm in RFC 2268 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include "../blapi.h" +#include "../blapii.h" +#include "secerr.h" +#ifdef XP_UNIX_XXX +#include /* for ptrdiff_t */ +#endif + +/* +** RC2 symmetric block cypher +*/ + +typedef SECStatus(rc2Func)(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen); + +/* forward declarations */ +static rc2Func rc2_EncryptECB; +static rc2Func rc2_DecryptECB; +static rc2Func rc2_EncryptCBC; +static rc2Func rc2_DecryptCBC; + +typedef union { + PRUint32 l[2]; + PRUint16 s[4]; + PRUint8 b[8]; +} RC2Block; + +struct RC2ContextStr { + union { + PRUint8 Kb[128]; + PRUint16 Kw[64]; + } u; + RC2Block iv; + rc2Func *enc; + rc2Func *dec; +}; + +#define B u.Kb +#define K u.Kw +#define BYTESWAP(x) ((x) << 8 | (x) >> 8) +#define SWAPK(i) cx->K[i] = (tmpS = cx->K[i], BYTESWAP(tmpS)) +#define RC2_BLOCK_SIZE 8 + +#define LOAD_HARD(R) \ + R[0] = (PRUint16)input[1] << 8 | input[0]; \ + R[1] = (PRUint16)input[3] << 8 | input[2]; \ + R[2] = (PRUint16)input[5] << 8 | input[4]; \ + R[3] = (PRUint16)input[7] << 8 | input[6]; +#define LOAD_EASY(R) \ + R[0] = ((PRUint16 *)input)[0]; \ + R[1] = ((PRUint16 *)input)[1]; \ + R[2] = ((PRUint16 *)input)[2]; \ + R[3] = ((PRUint16 *)input)[3]; +#define STORE_HARD(R) \ + output[0] = (PRUint8)(R[0]); \ + output[1] = (PRUint8)(R[0] >> 8); \ + output[2] = (PRUint8)(R[1]); \ + output[3] = (PRUint8)(R[1] >> 8); \ + output[4] = (PRUint8)(R[2]); \ + output[5] = (PRUint8)(R[2] >> 8); \ + output[6] = (PRUint8)(R[3]); \ + output[7] = (PRUint8)(R[3] >> 8); +#define STORE_EASY(R) \ + ((PRUint16 *)output)[0] = R[0]; \ + ((PRUint16 *)output)[1] = R[1]; \ + ((PRUint16 *)output)[2] = R[2]; \ + ((PRUint16 *)output)[3] = R[3]; + +#if defined(NSS_X86_OR_X64) +#define LOAD(R) LOAD_EASY(R) +#define STORE(R) STORE_EASY(R) +#elif !defined(IS_LITTLE_ENDIAN) +#define LOAD(R) LOAD_HARD(R) +#define STORE(R) STORE_HARD(R) +#else +#define LOAD(R) \ + if ((ptrdiff_t)input & 1) { \ + LOAD_HARD(R) \ + } else { \ + LOAD_EASY(R) \ + } +#define STORE(R) \ + if ((ptrdiff_t)input & 1) { \ + STORE_HARD(R) \ + } else { \ + STORE_EASY(R) \ + } +#endif + +static const PRUint8 S[256] = { + 0331, 0170, 0371, 0304, 0031, 0335, 0265, 0355, 0050, 0351, 0375, 0171, 0112, 0240, 0330, 0235, + 0306, 0176, 0067, 0203, 0053, 0166, 0123, 0216, 0142, 0114, 0144, 0210, 0104, 0213, 0373, 0242, + 0027, 0232, 0131, 0365, 0207, 0263, 0117, 0023, 0141, 0105, 0155, 0215, 0011, 0201, 0175, 0062, + 0275, 0217, 0100, 0353, 0206, 0267, 0173, 0013, 0360, 0225, 0041, 0042, 0134, 0153, 0116, 0202, + 0124, 0326, 0145, 0223, 0316, 0140, 0262, 0034, 0163, 0126, 0300, 0024, 0247, 0214, 0361, 0334, + 0022, 0165, 0312, 0037, 0073, 0276, 0344, 0321, 0102, 0075, 0324, 0060, 0243, 0074, 0266, 0046, + 0157, 0277, 0016, 0332, 0106, 0151, 0007, 0127, 0047, 0362, 0035, 0233, 0274, 0224, 0103, 0003, + 0370, 0021, 0307, 0366, 0220, 0357, 0076, 0347, 0006, 0303, 0325, 0057, 0310, 0146, 0036, 0327, + 0010, 0350, 0352, 0336, 0200, 0122, 0356, 0367, 0204, 0252, 0162, 0254, 0065, 0115, 0152, 0052, + 0226, 0032, 0322, 0161, 0132, 0025, 0111, 0164, 0113, 0237, 0320, 0136, 0004, 0030, 0244, 0354, + 0302, 0340, 0101, 0156, 0017, 0121, 0313, 0314, 0044, 0221, 0257, 0120, 0241, 0364, 0160, 0071, + 0231, 0174, 0072, 0205, 0043, 0270, 0264, 0172, 0374, 0002, 0066, 0133, 0045, 0125, 0227, 0061, + 0055, 0135, 0372, 0230, 0343, 0212, 0222, 0256, 0005, 0337, 0051, 0020, 0147, 0154, 0272, 0311, + 0323, 0000, 0346, 0317, 0341, 0236, 0250, 0054, 0143, 0026, 0001, 0077, 0130, 0342, 0211, 0251, + 0015, 0070, 0064, 0033, 0253, 0063, 0377, 0260, 0273, 0110, 0014, 0137, 0271, 0261, 0315, 0056, + 0305, 0363, 0333, 0107, 0345, 0245, 0234, 0167, 0012, 0246, 0040, 0150, 0376, 0177, 0301, 0255 +}; + +RC2Context * +RC2_AllocateContext(void) +{ + return PORT_ZNew(RC2Context); +} +SECStatus +RC2_InitContext(RC2Context *cx, const unsigned char *key, unsigned int len, + const unsigned char *input, int mode, unsigned int efLen8, + unsigned int unused) +{ + PRUint8 *L, *L2; + int i; +#if !defined(IS_LITTLE_ENDIAN) + PRUint16 tmpS; +#endif + PRUint8 tmpB; + + if (!key || !cx || !len || len > (sizeof cx->B) || + efLen8 > (sizeof cx->B)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_RC2) { + /* groovy */ + } else if (mode == NSS_RC2_CBC) { + if (!input) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } else { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (mode == NSS_RC2_CBC) { + cx->enc = &rc2_EncryptCBC; + cx->dec = &rc2_DecryptCBC; + LOAD(cx->iv.s); + } else { + cx->enc = &rc2_EncryptECB; + cx->dec = &rc2_DecryptECB; + } + + /* Step 0. Copy key into table. */ + memcpy(cx->B, key, len); + + /* Step 1. Compute all values to the right of the key. */ + L2 = cx->B; + L = L2 + len; + tmpB = L[-1]; + for (i = (sizeof cx->B) - len; i > 0; --i) { + *L++ = tmpB = S[(PRUint8)(tmpB + *L2++)]; + } + + /* step 2. Adjust left most byte of effective key. */ + i = (sizeof cx->B) - efLen8; + L = cx->B + i; + *L = tmpB = S[*L]; /* mask is always 0xff */ + + /* step 3. Recompute all values to the left of effective key. */ + L2 = --L + efLen8; + while (L >= cx->B) { + *L-- = tmpB = S[tmpB ^ *L2--]; + } + +#if !defined(IS_LITTLE_ENDIAN) + for (i = 63; i >= 0; --i) { + SWAPK(i); /* candidate for unrolling */ + } +#endif + return SECSuccess; +} + +/* +** Create a new RC2 context suitable for RC2 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC) +** "mode" one of NSS_RC2 or NSS_RC2_CBC +** "effectiveKeyLen" in bytes, not bits. +** +** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block +** chaining" mode. +*/ +RC2Context * +RC2_CreateContext(const unsigned char *key, unsigned int len, + const unsigned char *iv, int mode, unsigned efLen8) +{ + RC2Context *cx = PORT_ZNew(RC2Context); + if (cx) { + SECStatus rv = RC2_InitContext(cx, key, len, iv, mode, efLen8, 0); + if (rv != SECSuccess) { + RC2_DestroyContext(cx, PR_TRUE); + cx = NULL; + } + } + return cx; +} + +/* +** Destroy an RC2 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +void +RC2_DestroyContext(RC2Context *cx, PRBool freeit) +{ + if (cx) { + memset(cx, 0, sizeof *cx); + if (freeit) { + PORT_Free(cx); + } + } +} + +#define ROL(x, k) (x << k | x >> (16 - k)) +#define MIX(j) \ + R0 = R0 + cx->K[4 * j + 0] + (R3 & R2) + (~R3 & R1); \ + R0 = ROL(R0, 1); \ + R1 = R1 + cx->K[4 * j + 1] + (R0 & R3) + (~R0 & R2); \ + R1 = ROL(R1, 2); \ + R2 = R2 + cx->K[4 * j + 2] + (R1 & R0) + (~R1 & R3); \ + R2 = ROL(R2, 3); \ + R3 = R3 + cx->K[4 * j + 3] + (R2 & R1) + (~R2 & R0); \ + R3 = ROL(R3, 5) +#define MASH \ + R0 = R0 + cx->K[R3 & 63]; \ + R1 = R1 + cx->K[R0 & 63]; \ + R2 = R2 + cx->K[R1 & 63]; \ + R3 = R3 + cx->K[R2 & 63] + +/* Encrypt one block */ +static void +rc2_Encrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input) +{ + register PRUint16 R0, R1, R2, R3; + + /* step 1. Initialize input. */ + R0 = input->s[0]; + R1 = input->s[1]; + R2 = input->s[2]; + R3 = input->s[3]; + + /* step 2. Expand Key (already done, in context) */ + /* step 3. j = 0 */ + /* step 4. Perform 5 mixing rounds. */ + + MIX(0); + MIX(1); + MIX(2); + MIX(3); + MIX(4); + + /* step 5. Perform 1 mashing round. */ + MASH; + + /* step 6. Perform 6 mixing rounds. */ + + MIX(5); + MIX(6); + MIX(7); + MIX(8); + MIX(9); + MIX(10); + + /* step 7. Perform 1 mashing round. */ + MASH; + + /* step 8. Perform 5 mixing rounds. */ + + MIX(11); + MIX(12); + MIX(13); + MIX(14); + MIX(15); + + /* output results */ + output->s[0] = R0; + output->s[1] = R1; + output->s[2] = R2; + output->s[3] = R3; +} + +#define ROR(x, k) (x >> k | x << (16 - k)) +#define R_MIX(j) \ + R3 = ROR(R3, 5); \ + R3 = R3 - cx->K[4 * j + 3] - (R2 & R1) - (~R2 & R0); \ + R2 = ROR(R2, 3); \ + R2 = R2 - cx->K[4 * j + 2] - (R1 & R0) - (~R1 & R3); \ + R1 = ROR(R1, 2); \ + R1 = R1 - cx->K[4 * j + 1] - (R0 & R3) - (~R0 & R2); \ + R0 = ROR(R0, 1); \ + R0 = R0 - cx->K[4 * j + 0] - (R3 & R2) - (~R3 & R1) +#define R_MASH \ + R3 = R3 - cx->K[R2 & 63]; \ + R2 = R2 - cx->K[R1 & 63]; \ + R1 = R1 - cx->K[R0 & 63]; \ + R0 = R0 - cx->K[R3 & 63] + +/* Encrypt one block */ +static void +rc2_Decrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input) +{ + register PRUint16 R0, R1, R2, R3; + + /* step 1. Initialize input. */ + R0 = input->s[0]; + R1 = input->s[1]; + R2 = input->s[2]; + R3 = input->s[3]; + + /* step 2. Expand Key (already done, in context) */ + /* step 3. j = 63 */ + /* step 4. Perform 5 r_mixing rounds. */ + R_MIX(15); + R_MIX(14); + R_MIX(13); + R_MIX(12); + R_MIX(11); + + /* step 5. Perform 1 r_mashing round. */ + R_MASH; + + /* step 6. Perform 6 r_mixing rounds. */ + R_MIX(10); + R_MIX(9); + R_MIX(8); + R_MIX(7); + R_MIX(6); + R_MIX(5); + + /* step 7. Perform 1 r_mashing round. */ + R_MASH; + + /* step 8. Perform 5 r_mixing rounds. */ + R_MIX(4); + R_MIX(3); + R_MIX(2); + R_MIX(1); + R_MIX(0); + + /* output results */ + output->s[0] = R0; + output->s[1] = R1; + output->s[2] = R2; + output->s[3] = R3; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_EncryptECB(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + + while (inputLen > 0) { + LOAD(iBlock.s) + rc2_Encrypt1Block(cx, &iBlock, &iBlock); + STORE(iBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_DecryptECB(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + + while (inputLen > 0) { + LOAD(iBlock.s) + rc2_Decrypt1Block(cx, &iBlock, &iBlock); + STORE(iBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_EncryptCBC(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + + while (inputLen > 0) { + + LOAD(iBlock.s) + iBlock.l[0] ^= cx->iv.l[0]; + iBlock.l[1] ^= cx->iv.l[1]; + rc2_Encrypt1Block(cx, &iBlock, &iBlock); + cx->iv = iBlock; + STORE(iBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_DecryptCBC(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + RC2Block oBlock; + + while (inputLen > 0) { + LOAD(iBlock.s) + rc2_Decrypt1Block(cx, &oBlock, &iBlock); + oBlock.l[0] ^= cx->iv.l[0]; + oBlock.l[1] ^= cx->iv.l[1]; + cx->iv = iBlock; + STORE(oBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +/* +** Perform RC2 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC2_Encrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + if (inputLen) { + if (inputLen % RC2_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + rv = (*cx->enc)(cx, output, input, inputLen); + } + if (rv == SECSuccess) { + *outputLen = inputLen; + } + return rv; +} + +/* +** Perform RC2 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC2_Decrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + if (inputLen) { + if (inputLen % RC2_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + rv = (*cx->dec)(cx, output, input, inputLen); + } + if (rv == SECSuccess) { + *outputLen = inputLen; + } + return rv; +} diff --git a/security/nss/lib/freebl/deprecated/seed.c b/security/nss/lib/freebl/deprecated/seed.c new file mode 100644 index 0000000000..fd27bbd0e1 --- /dev/null +++ b/security/nss/lib/freebl/deprecated/seed.c @@ -0,0 +1,671 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include +#include +#include +#include +#ifdef WIN32 +#include +#endif + +#include "seed.h" +#include "secerr.h" + +static const seed_word SS[4][256] = { + { 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, + 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124, + 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, + 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, + 0x28082028, 0x04444044, 0x20002020, 0x1d8d919c, + 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314, + 0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378, + 0x3b8bb3b8, 0x13031310, 0x12c2d2d0, 0x2ecee2ec, + 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8, + 0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074, + 0x2ccce0ec, 0x15859194, 0x0b0b0308, 0x17475354, + 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100, + 0x24042024, 0x1c0c101c, 0x33437370, 0x18889098, + 0x10001010, 0x0cccc0cc, 0x32c2f2f0, 0x19c9d1d8, + 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380, + 0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8, + 0x20406060, 0x10405050, 0x2383a3a0, 0x2bcbe3e8, + 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c, + 0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078, + 0x2686a2a4, 0x12021210, 0x2f8fa3ac, 0x15c5d1d4, + 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140, + 0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008, + 0x1f0f131c, 0x19899198, 0x00000000, 0x19091118, + 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0, + 0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324, + 0x3080b0b0, 0x0b8b8388, 0x0e0e020c, 0x2b8ba3a8, + 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c, + 0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208, + 0x3f8fb3bc, 0x2fcfe3ec, 0x33c3f3f0, 0x05c5c1c4, + 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064, + 0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218, + 0x06060204, 0x21012120, 0x2b4b6368, 0x26466264, + 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288, + 0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0, + 0x3a4a7278, 0x07474344, 0x16869294, 0x25c5e1e4, + 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc, + 0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac, + 0x36063234, 0x15051114, 0x22022220, 0x38083038, + 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c, + 0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394, + 0x35053134, 0x0bcbc3c8, 0x0ecec2cc, 0x3c0c303c, + 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188, + 0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8, + 0x14849094, 0x19495158, 0x02828280, 0x04c4c0c4, + 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364, + 0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8, + 0x0f0f030c, 0x0e8e828c, 0x02424240, 0x23032320, + 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4, + 0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0, + 0x2f4f636c, 0x3d0d313c, 0x2d0d212c, 0x00404040, + 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0, + 0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154, + 0x3b0b3338, 0x1cccd0dc, 0x28486068, 0x3f4f737c, + 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254, + 0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244, + 0x3585b1b4, 0x2b0b2328, 0x25456164, 0x3acaf2f8, + 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c, + 0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0, + 0x31013130, 0x2acae2e8, 0x2d4d616c, 0x1f4f535c, + 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088, + 0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4, + 0x22426260, 0x29092128, 0x07070304, 0x33033330, + 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178, + 0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298 }, + { 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2, + 0xcc0fcfc3, 0xdc1eced2, 0xb03383b3, 0xb83888b0, + 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3, + 0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53, + 0xc003c3c3, 0x60224262, 0x30330333, 0xb43585b1, + 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3, + 0xd013c3d3, 0x90118191, 0x10110111, 0x04060602, + 0x1c1c0c10, 0xbc3c8cb0, 0x34360632, 0x480b4b43, + 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0, + 0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0, + 0xc002c2c2, 0x44054541, 0xe021c1e1, 0xd416c6d2, + 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890, + 0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32, + 0xa42585a1, 0xf839c9f1, 0x0c0d0d01, 0xdc1fcfd3, + 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72, + 0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272, + 0x40024242, 0xd414c4d0, 0x40014141, 0xc000c0c0, + 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83, + 0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13, + 0xc80acac2, 0x2c2c0c20, 0xa82a8aa2, 0x34340430, + 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1, + 0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0, + 0x54174753, 0xac2e8ea2, 0x08080800, 0xc405c5c1, + 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1, + 0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131, + 0xf435c5f1, 0x880a8a82, 0x682a4a62, 0xb03181b1, + 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202, + 0x20220222, 0x04040400, 0x68284860, 0x70314171, + 0x04070703, 0xd81bcbd3, 0x9c1d8d91, 0x98198991, + 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951, + 0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0, + 0x981a8a92, 0xa02383a3, 0xa82b8ba3, 0xd010c0d0, + 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12, + 0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3, + 0x94168692, 0x783b4b73, 0x5c1c4c50, 0xa02282a2, + 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41, + 0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32, + 0x0c0c0c00, 0x2c2e0e22, 0xb83a8ab2, 0x6c2e4e62, + 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292, + 0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0, + 0x14150511, 0xf83bcbf3, 0x70304070, 0x74354571, + 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303, + 0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470, + 0xd415c5d1, 0xb43484b0, 0xe82acae2, 0x08090901, + 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040, + 0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501, + 0xf83acaf2, 0x00010101, 0xf030c0f0, 0x282a0a22, + 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343, + 0x84058581, 0x14140410, 0x88098981, 0x981b8b93, + 0xb03080b0, 0xe425c5e1, 0x48084840, 0x78394971, + 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282, + 0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53, + 0x74374773, 0x54144450, 0xb03282b2, 0x1c1d0d11, + 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642, + 0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3, + 0x7c3e4e72, 0xd81acad2, 0xc809c9c1, 0xfc3dcdf1, + 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30, + 0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70, + 0x0c0e0e02, 0x50104050, 0x38390931, 0x24260622, + 0x30320232, 0x84048480, 0x68294961, 0x90138393, + 0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0, + 0xc80bcbc3, 0x50134353, 0x080a0a02, 0x84078783, + 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83, + 0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3 }, + { 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3, + 0x50541444, 0x111c1d0d, 0xa0ac2c8c, 0x21242505, + 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e, + 0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343, + 0x20282808, 0x40440444, 0x20202000, 0x919c1d8d, + 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707, + 0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b, + 0xb3b83b8b, 0x13101303, 0xd2d012c2, 0xe2ec2ece, + 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888, + 0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444, + 0xe0ec2ccc, 0x91941585, 0x03080b0b, 0x53541747, + 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101, + 0x20242404, 0x101c1c0c, 0x73703343, 0x90981888, + 0x10101000, 0xc0cc0ccc, 0xf2f032c2, 0xd1d819c9, + 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383, + 0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9, + 0x60602040, 0x50501040, 0xa3a02383, 0xe3e82bcb, + 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f, + 0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848, + 0xa2a42686, 0x12101202, 0xa3ac2f8f, 0xd1d415c5, + 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141, + 0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808, + 0x131c1f0f, 0x91981989, 0x00000000, 0x11181909, + 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1, + 0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707, + 0xb0b03080, 0x83880b8b, 0x020c0e0e, 0xa3a82b8b, + 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d, + 0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a, + 0xb3bc3f8f, 0xe3ec2fcf, 0xf3f033c3, 0xc1c405c5, + 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444, + 0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a, + 0x02040606, 0x21202101, 0x63682b4b, 0x62642646, + 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a, + 0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0, + 0x72783a4a, 0x43440747, 0x92941686, 0xe1e425c5, + 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf, + 0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e, + 0x32343606, 0x11141505, 0x22202202, 0x30383808, + 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c, + 0x81800181, 0xe1e829c9, 0x80840484, 0x93941787, + 0x31343505, 0xc3c80bcb, 0xc2cc0ece, 0x303c3c0c, + 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989, + 0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8, + 0x90941484, 0x51581949, 0x82800282, 0xc0c404c4, + 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747, + 0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888, + 0x030c0f0f, 0x828c0e8e, 0x42400242, 0x23202303, + 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484, + 0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2, + 0x636c2f4f, 0x313c3d0d, 0x212c2d0d, 0x40400040, + 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1, + 0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545, + 0x33383b0b, 0xd0dc1ccc, 0x60682848, 0x737c3f4f, + 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646, + 0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646, + 0xb1b43585, 0x23282b0b, 0x61642545, 0xf2f83aca, + 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f, + 0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282, + 0x31303101, 0xe2e82aca, 0x616c2d4d, 0x535c1f4f, + 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888, + 0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4, + 0x62602242, 0x21282909, 0x03040707, 0x33303303, + 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949, + 0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a }, + { 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426, + 0xcfc3cc0f, 0xced2dc1e, 0x83b3b033, 0x88b0b838, + 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407, + 0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b, + 0xc3c3c003, 0x42626022, 0x03333033, 0x85b1b435, + 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427, + 0xc3d3d013, 0x81919011, 0x01111011, 0x06020406, + 0x0c101c1c, 0x8cb0bc3c, 0x06323436, 0x4b43480b, + 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828, + 0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434, + 0xc2c2c002, 0x45414405, 0xc1e1e021, 0xc6d2d416, + 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818, + 0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e, + 0x85a1a425, 0xc9f1f839, 0x0d010c0d, 0xcfd3dc1f, + 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a, + 0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032, + 0x42424002, 0xc4d0d414, 0x41414001, 0xc0c0c000, + 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b, + 0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f, + 0xcac2c80a, 0x0c202c2c, 0x8aa2a82a, 0x04303434, + 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829, + 0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838, + 0x47535417, 0x8ea2ac2e, 0x08000808, 0xc5c1c405, + 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839, + 0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031, + 0xc5f1f435, 0x8a82880a, 0x4a62682a, 0x81b1b031, + 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002, + 0x02222022, 0x04000404, 0x48606828, 0x41717031, + 0x07030407, 0xcbd3d81b, 0x8d919c1d, 0x89919819, + 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819, + 0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c, + 0x8a92981a, 0x83a3a023, 0x8ba3a82b, 0xc0d0d010, + 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a, + 0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f, + 0x86929416, 0x4b73783b, 0x4c505c1c, 0x82a2a022, + 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d, + 0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a, + 0x0c000c0c, 0x0e222c2e, 0x8ab2b83a, 0x4e626c2e, + 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012, + 0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c, + 0x05111415, 0xcbf3f83b, 0x40707030, 0x45717435, + 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003, + 0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434, + 0xc5d1d415, 0x84b0b434, 0xcae2e82a, 0x09010809, + 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000, + 0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405, + 0xcaf2f83a, 0x01010001, 0xc0f0f030, 0x0a22282a, + 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003, + 0x85818405, 0x04101414, 0x89818809, 0x8b93981b, + 0x80b0b030, 0xc5e1e425, 0x48404808, 0x49717839, + 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002, + 0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f, + 0x47737437, 0x44505414, 0x82b2b032, 0x0d111c1d, + 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406, + 0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b, + 0x4e727c3e, 0xcad2d81a, 0xc9c1c809, 0xcdf1fc3d, + 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c, + 0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c, + 0x0e020c0e, 0x40505010, 0x09313839, 0x06222426, + 0x02323032, 0x84808404, 0x49616829, 0x83939013, + 0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424, + 0xcbc3c80b, 0x43535013, 0x0a02080a, 0x87838407, + 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f, + 0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437 } +}; + +/* key schedule constants - golden ratio */ +#define KC0 0x9e3779b9 +#define KC1 0x3c6ef373 +#define KC2 0x78dde6e6 +#define KC3 0xf1bbcdcc +#define KC4 0xe3779b99 +#define KC5 0xc6ef3733 +#define KC6 0x8dde6e67 +#define KC7 0x1bbcdccf +#define KC8 0x3779b99e +#define KC9 0x6ef3733c +#define KC10 0xdde6e678 +#define KC11 0xbbcdccf1 +#define KC12 0x779b99e3 +#define KC13 0xef3733c6 +#define KC14 0xde6e678d +#define KC15 0xbcdccf1b + +void +SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], + SEED_KEY_SCHEDULE *ks) +{ + seed_word K0, K1, K2, K3; + seed_word t0, t1; + + char2word(rawkey, K0); + char2word(rawkey + 4, K1); + char2word(rawkey + 8, K2); + char2word(rawkey + 12, K3); + + t0 = (K0 + K2 - KC0); + t1 = (K1 - K3 + KC0); + KEYUPDATE_TEMP(t0, t1, &ks->data[0]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC1); + KEYUPDATE_TEMP(t0, t1, &ks->data[2]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC2); + KEYUPDATE_TEMP(t0, t1, &ks->data[4]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC3); + KEYUPDATE_TEMP(t0, t1, &ks->data[6]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC4); + KEYUPDATE_TEMP(t0, t1, &ks->data[8]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC5); + KEYUPDATE_TEMP(t0, t1, &ks->data[10]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC6); + KEYUPDATE_TEMP(t0, t1, &ks->data[12]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC7); + KEYUPDATE_TEMP(t0, t1, &ks->data[14]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC8); + KEYUPDATE_TEMP(t0, t1, &ks->data[16]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC9); + KEYUPDATE_TEMP(t0, t1, &ks->data[18]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC10); + KEYUPDATE_TEMP(t0, t1, &ks->data[20]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC11); + KEYUPDATE_TEMP(t0, t1, &ks->data[22]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC12); + KEYUPDATE_TEMP(t0, t1, &ks->data[24]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC13); + KEYUPDATE_TEMP(t0, t1, &ks->data[26]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC14); + KEYUPDATE_TEMP(t0, t1, &ks->data[28]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC15); + KEYUPDATE_TEMP(t0, t1, &ks->data[30]); +} + +void +SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks) +{ + seed_word L0, L1, R0, R1; + seed_word t0, t1; + + char2word(s, L0); + char2word(s + 4, L1); + char2word(s + 8, R0); + char2word(s + 12, R1); + + E_SEED(t0, t1, L0, L1, R0, R1, 0); + E_SEED(t0, t1, R0, R1, L0, L1, 2); + E_SEED(t0, t1, L0, L1, R0, R1, 4); + E_SEED(t0, t1, R0, R1, L0, L1, 6); + E_SEED(t0, t1, L0, L1, R0, R1, 8); + E_SEED(t0, t1, R0, R1, L0, L1, 10); + E_SEED(t0, t1, L0, L1, R0, R1, 12); + E_SEED(t0, t1, R0, R1, L0, L1, 14); + E_SEED(t0, t1, L0, L1, R0, R1, 16); + E_SEED(t0, t1, R0, R1, L0, L1, 18); + E_SEED(t0, t1, L0, L1, R0, R1, 20); + E_SEED(t0, t1, R0, R1, L0, L1, 22); + E_SEED(t0, t1, L0, L1, R0, R1, 24); + E_SEED(t0, t1, R0, R1, L0, L1, 26); + E_SEED(t0, t1, L0, L1, R0, R1, 28); + E_SEED(t0, t1, R0, R1, L0, L1, 30); + + word2char(R0, d); + word2char(R1, d + 4); + word2char(L0, d + 8); + word2char(L1, d + 12); +} + +void +SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks) +{ + seed_word L0, L1, R0, R1; + seed_word t0, t1; + + char2word(s, L0); + char2word(s + 4, L1); + char2word(s + 8, R0); + char2word(s + 12, R1); + + E_SEED(t0, t1, L0, L1, R0, R1, 30); + E_SEED(t0, t1, R0, R1, L0, L1, 28); + E_SEED(t0, t1, L0, L1, R0, R1, 26); + E_SEED(t0, t1, R0, R1, L0, L1, 24); + E_SEED(t0, t1, L0, L1, R0, R1, 22); + E_SEED(t0, t1, R0, R1, L0, L1, 20); + E_SEED(t0, t1, L0, L1, R0, R1, 18); + E_SEED(t0, t1, R0, R1, L0, L1, 16); + E_SEED(t0, t1, L0, L1, R0, R1, 14); + E_SEED(t0, t1, R0, R1, L0, L1, 12); + E_SEED(t0, t1, L0, L1, R0, R1, 10); + E_SEED(t0, t1, R0, R1, L0, L1, 8); + E_SEED(t0, t1, L0, L1, R0, R1, 6); + E_SEED(t0, t1, R0, R1, L0, L1, 4); + E_SEED(t0, t1, L0, L1, R0, R1, 2); + E_SEED(t0, t1, R0, R1, L0, L1, 0); + + word2char(R0, d); + word2char(R1, d + 4); + word2char(L0, d + 8); + word2char(L1, d + 12); +} + +void +SEED_ecb_encrypt(const unsigned char *in, + unsigned char *out, + size_t inLen, + const SEED_KEY_SCHEDULE *ks, int enc) +{ + if (enc) { + while (inLen > 0) { + SEED_encrypt(in, out, ks); + out += SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + inLen -= SEED_BLOCK_SIZE; + } + } else { + while (inLen > 0) { + SEED_decrypt(in, out, ks); + out += SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + inLen -= SEED_BLOCK_SIZE; + } + } +} + +void +SEED_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const SEED_KEY_SCHEDULE *ks, + unsigned char ivec[SEED_BLOCK_SIZE], int enc) +{ + size_t n; + unsigned char tmp[SEED_BLOCK_SIZE]; + const unsigned char *iv = ivec; + + if (enc) { + while (len >= SEED_BLOCK_SIZE) { + for (n = 0; n < SEED_BLOCK_SIZE; ++n) { + out[n] = in[n] ^ iv[n]; + } + + SEED_encrypt(out, out, ks); + iv = out; + len -= SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + out += SEED_BLOCK_SIZE; + } + + if (len) { + for (n = 0; n < len; ++n) { + out[n] = in[n] ^ iv[n]; + } + + for (n = len; n < SEED_BLOCK_SIZE; ++n) { + out[n] = iv[n]; + } + + SEED_encrypt(out, out, ks); + iv = out; + } + + memcpy(ivec, iv, SEED_BLOCK_SIZE); + } else if (in != out) { + while (len >= SEED_BLOCK_SIZE) { + SEED_decrypt(in, out, ks); + + for (n = 0; n < SEED_BLOCK_SIZE; ++n) { + out[n] ^= iv[n]; + } + + iv = in; + len -= SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + out += SEED_BLOCK_SIZE; + } + + if (len) { + SEED_decrypt(in, tmp, ks); + + for (n = 0; n < len; ++n) { + out[n] = tmp[n] ^ iv[n]; + } + + iv = in; + } + + memcpy(ivec, iv, SEED_BLOCK_SIZE); + } else { + while (len >= SEED_BLOCK_SIZE) { + memcpy(tmp, in, SEED_BLOCK_SIZE); + SEED_decrypt(in, out, ks); + + for (n = 0; n < SEED_BLOCK_SIZE; ++n) { + out[n] ^= ivec[n]; + } + + memcpy(ivec, tmp, SEED_BLOCK_SIZE); + len -= SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + out += SEED_BLOCK_SIZE; + } + + if (len) { + memcpy(tmp, in, SEED_BLOCK_SIZE); + SEED_decrypt(tmp, tmp, ks); + + for (n = 0; n < len; ++n) { + out[n] = tmp[n] ^ ivec[n]; + } + + memcpy(ivec, tmp, SEED_BLOCK_SIZE); + } + } +} + +SEEDContext * +SEED_AllocateContext(void) +{ + return PORT_ZNew(SEEDContext); +} + +SECStatus +SEED_InitContext(SEEDContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, + int mode, unsigned int encrypt, unsigned int unused) +{ + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (mode) { + case NSS_SEED: + SEED_set_key(key, &cx->ks); + cx->mode = NSS_SEED; + cx->encrypt = encrypt; + break; + + case NSS_SEED_CBC: + memcpy(cx->iv, iv, 16); + SEED_set_key(key, &cx->ks); + cx->mode = NSS_SEED_CBC; + cx->encrypt = encrypt; + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + return SECSuccess; +} + +SEEDContext * +SEED_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt) +{ + SEEDContext *cx = PORT_ZNew(SEEDContext); + SECStatus rv = SEED_InitContext(cx, key, SEED_KEY_LENGTH, iv, mode, + encrypt, 0); + + if (rv != SECSuccess) { + PORT_ZFree(cx, sizeof *cx); + cx = NULL; + } + + return cx; +} + +void +SEED_DestroyContext(SEEDContext *cx, PRBool freeit) +{ + if (cx) { + memset(cx, 0, sizeof *cx); + + if (freeit) + PORT_Free(cx); + } +} + +SECStatus +SEED_Encrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen, + unsigned int maxOutLen, const unsigned char *in, + unsigned int inLen) +{ + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if ((inLen % SEED_BLOCK_SIZE) != 0 || maxOutLen < SEED_BLOCK_SIZE || + maxOutLen < inLen) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (!cx->encrypt) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (cx->mode) { + case NSS_SEED: + SEED_ecb_encrypt(in, out, inLen, &cx->ks, 1); + *outLen = inLen; + break; + + case NSS_SEED_CBC: + SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 1); + *outLen = inLen; + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + return SECSuccess; +} + +SECStatus +SEED_Decrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen, + unsigned int maxOutLen, const unsigned char *in, + unsigned int inLen) +{ + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if ((inLen % SEED_BLOCK_SIZE) != 0 || maxOutLen < SEED_BLOCK_SIZE || + maxOutLen < inLen) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (cx->encrypt) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (cx->mode) { + case NSS_SEED: + SEED_ecb_encrypt(in, out, inLen, &cx->ks, 0); + *outLen = inLen; + break; + + case NSS_SEED_CBC: + SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 0); + *outLen = inLen; + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + return SECSuccess; +} diff --git a/security/nss/lib/freebl/deprecated/seed.h b/security/nss/lib/freebl/deprecated/seed.h new file mode 100644 index 0000000000..717a1e74ed --- /dev/null +++ b/security/nss/lib/freebl/deprecated/seed.h @@ -0,0 +1,125 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef HEADER_SEED_H +#define HEADER_SEED_H + +#include +#include "blapi.h" + +#if !defined(NO_SYS_TYPES_H) +#include +#endif + +typedef PRUint32 seed_word; + +#define G_FUNC(v) \ + SS[0][((v)&0xff)] ^ \ + SS[1][((v) >> 8 & 0xff)] ^ \ + SS[2][((v) >> 16 & 0xff)] ^ \ + SS[3][((v) >> 24 & 0xff)] + +#define char2word(c, i) \ + (i) = ((((seed_word)((c)[0])) << 24) | \ + (((seed_word)((c)[1])) << 16) | \ + (((seed_word)((c)[2])) << 8) | \ + ((seed_word)((c)[3]))) + +#define word2char(l, c) \ + *((c) + 0) = (unsigned char)((l) >> 24); \ + *((c) + 1) = (unsigned char)((l) >> 16); \ + *((c) + 2) = (unsigned char)((l) >> 8); \ + *((c) + 3) = (unsigned char)((l)) + +#define KEYSCHEDULE_UPDATE0(T0, T1, K0, K1, K2, K3, KC) \ + (T0) = (K2); \ + (K2) = (((K2) << 8) ^ ((K3) >> 24)); \ + (K3) = (((K3) << 8) ^ ((T0) >> 24)); \ + (T0) = ((K0) + (K2) - (KC)); \ + (T1) = ((K1) + (KC) - (K3)) + +#define KEYSCHEDULE_UPDATE1(T0, T1, K0, K1, K2, K3, KC) \ + (T0) = (K0); \ + (K0) = (((K0) >> 8) ^ ((K1) << 24)); \ + (K1) = (((K1) >> 8) ^ ((T0) << 24)); \ + (T0) = ((K0) + (K2) - (KC)); \ + (T1) = ((K1) + (KC) - (K3)) + +#define KEYUPDATE_TEMP(T0, T1, K) \ + (K)[0] = G_FUNC((T0)); \ + (K)[1] = G_FUNC((T1)) + +#define XOR_SEEDBLOCK(DST, SRC) \ + (DST)[0] ^= (SRC)[0]; \ + (DST)[1] ^= (SRC)[1]; \ + (DST)[2] ^= (SRC)[2]; \ + (DST)[3] ^= (SRC)[3] + +#define MOV_SEEDBLOCK(DST, SRC) \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3] + +#define CHAR2WORD(C, I) \ + char2word((C), (I)[0]); \ + char2word((C) + 4, (I)[1]); \ + char2word((C) + 8, (I)[2]); \ + char2word((C) + 12, (I)[3]) + +#define WORD2CHAR(I, C) \ + word2char((I)[0], (C)); \ + word2char((I)[1], (C + 4)); \ + word2char((I)[2], (C + 8)); \ + word2char((I)[3], (C + 12)) + +#define E_SEED(T0, T1, X1, X2, X3, X4, rbase) \ + (T0) = (X3) ^ (ks->data)[(rbase)]; \ + (T1) = (X4) ^ (ks->data)[(rbase) + 1]; \ + (T1) ^= (T0); \ + (T1) = G_FUNC(T1); \ + (T0) += (T1); \ + (T0) = G_FUNC(T0); \ + (T1) += (T0); \ + (T1) = G_FUNC(T1); \ + (T0) += (T1); \ + (X1) ^= (T0); \ + (X2) ^= (T1) + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct seed_key_st { + PRUint32 data[32]; +} SEED_KEY_SCHEDULE; + +struct SEEDContextStr { + unsigned char iv[SEED_BLOCK_SIZE]; + SEED_KEY_SCHEDULE ks; + int mode; + unsigned int encrypt; +}; + +void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], + SEED_KEY_SCHEDULE *ks); + +void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks); +void SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks); + +void SEED_ecb_encrypt(const unsigned char *in, unsigned char *out, + size_t inLen, const SEED_KEY_SCHEDULE *ks, int enc); +void SEED_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const SEED_KEY_SCHEDULE *ks, + unsigned char ivec[SEED_BLOCK_SIZE], int enc); + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_SEED_H */ diff --git a/security/nss/lib/freebl/des.c b/security/nss/lib/freebl/des.c new file mode 100644 index 0000000000..fd433bbb2f --- /dev/null +++ b/security/nss/lib/freebl/des.c @@ -0,0 +1,676 @@ +/* + * des.c + * + * core source file for DES-150 library + * Make key schedule from DES key. + * Encrypt/Decrypt one 8-byte block. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "des.h" +#include "blapii.h" +#include /* for ptrdiff_t */ +/* #define USE_INDEXING 1 */ + +/* + * The tables below are the 8 sbox functions, with the 6-bit input permutation + * and the 32-bit output permutation pre-computed. + * They are shifted circularly to the left 3 bits, which removes 2 shifts + * and an or from each round by reducing the number of sboxes whose + * indices cross word broundaries from 2 to 1. + */ + +static const HALF SP[8][64] = { + /* Box S1 */ + { 0x04041000, 0x00000000, 0x00040000, 0x04041010, + 0x04040010, 0x00041010, 0x00000010, 0x00040000, + 0x00001000, 0x04041000, 0x04041010, 0x00001000, + 0x04001010, 0x04040010, 0x04000000, 0x00000010, + 0x00001010, 0x04001000, 0x04001000, 0x00041000, + 0x00041000, 0x04040000, 0x04040000, 0x04001010, + 0x00040010, 0x04000010, 0x04000010, 0x00040010, + 0x00000000, 0x00001010, 0x00041010, 0x04000000, + 0x00040000, 0x04041010, 0x00000010, 0x04040000, + 0x04041000, 0x04000000, 0x04000000, 0x00001000, + 0x04040010, 0x00040000, 0x00041000, 0x04000010, + 0x00001000, 0x00000010, 0x04001010, 0x00041010, + 0x04041010, 0x00040010, 0x04040000, 0x04001010, + 0x04000010, 0x00001010, 0x00041010, 0x04041000, + 0x00001010, 0x04001000, 0x04001000, 0x00000000, + 0x00040010, 0x00041000, 0x00000000, 0x04040010 }, + /* Box S2 */ + { 0x00420082, 0x00020002, 0x00020000, 0x00420080, + 0x00400000, 0x00000080, 0x00400082, 0x00020082, + 0x00000082, 0x00420082, 0x00420002, 0x00000002, + 0x00020002, 0x00400000, 0x00000080, 0x00400082, + 0x00420000, 0x00400080, 0x00020082, 0x00000000, + 0x00000002, 0x00020000, 0x00420080, 0x00400002, + 0x00400080, 0x00000082, 0x00000000, 0x00420000, + 0x00020080, 0x00420002, 0x00400002, 0x00020080, + 0x00000000, 0x00420080, 0x00400082, 0x00400000, + 0x00020082, 0x00400002, 0x00420002, 0x00020000, + 0x00400002, 0x00020002, 0x00000080, 0x00420082, + 0x00420080, 0x00000080, 0x00020000, 0x00000002, + 0x00020080, 0x00420002, 0x00400000, 0x00000082, + 0x00400080, 0x00020082, 0x00000082, 0x00400080, + 0x00420000, 0x00000000, 0x00020002, 0x00020080, + 0x00000002, 0x00400082, 0x00420082, 0x00420000 }, + /* Box S3 */ + { 0x00000820, 0x20080800, 0x00000000, 0x20080020, + 0x20000800, 0x00000000, 0x00080820, 0x20000800, + 0x00080020, 0x20000020, 0x20000020, 0x00080000, + 0x20080820, 0x00080020, 0x20080000, 0x00000820, + 0x20000000, 0x00000020, 0x20080800, 0x00000800, + 0x00080800, 0x20080000, 0x20080020, 0x00080820, + 0x20000820, 0x00080800, 0x00080000, 0x20000820, + 0x00000020, 0x20080820, 0x00000800, 0x20000000, + 0x20080800, 0x20000000, 0x00080020, 0x00000820, + 0x00080000, 0x20080800, 0x20000800, 0x00000000, + 0x00000800, 0x00080020, 0x20080820, 0x20000800, + 0x20000020, 0x00000800, 0x00000000, 0x20080020, + 0x20000820, 0x00080000, 0x20000000, 0x20080820, + 0x00000020, 0x00080820, 0x00080800, 0x20000020, + 0x20080000, 0x20000820, 0x00000820, 0x20080000, + 0x00080820, 0x00000020, 0x20080020, 0x00080800 }, + /* Box S4 */ + { 0x02008004, 0x00008204, 0x00008204, 0x00000200, + 0x02008200, 0x02000204, 0x02000004, 0x00008004, + 0x00000000, 0x02008000, 0x02008000, 0x02008204, + 0x00000204, 0x00000000, 0x02000200, 0x02000004, + 0x00000004, 0x00008000, 0x02000000, 0x02008004, + 0x00000200, 0x02000000, 0x00008004, 0x00008200, + 0x02000204, 0x00000004, 0x00008200, 0x02000200, + 0x00008000, 0x02008200, 0x02008204, 0x00000204, + 0x02000200, 0x02000004, 0x02008000, 0x02008204, + 0x00000204, 0x00000000, 0x00000000, 0x02008000, + 0x00008200, 0x02000200, 0x02000204, 0x00000004, + 0x02008004, 0x00008204, 0x00008204, 0x00000200, + 0x02008204, 0x00000204, 0x00000004, 0x00008000, + 0x02000004, 0x00008004, 0x02008200, 0x02000204, + 0x00008004, 0x00008200, 0x02000000, 0x02008004, + 0x00000200, 0x02000000, 0x00008000, 0x02008200 }, + /* Box S5 */ + { 0x00000400, 0x08200400, 0x08200000, 0x08000401, + 0x00200000, 0x00000400, 0x00000001, 0x08200000, + 0x00200401, 0x00200000, 0x08000400, 0x00200401, + 0x08000401, 0x08200001, 0x00200400, 0x00000001, + 0x08000000, 0x00200001, 0x00200001, 0x00000000, + 0x00000401, 0x08200401, 0x08200401, 0x08000400, + 0x08200001, 0x00000401, 0x00000000, 0x08000001, + 0x08200400, 0x08000000, 0x08000001, 0x00200400, + 0x00200000, 0x08000401, 0x00000400, 0x08000000, + 0x00000001, 0x08200000, 0x08000401, 0x00200401, + 0x08000400, 0x00000001, 0x08200001, 0x08200400, + 0x00200401, 0x00000400, 0x08000000, 0x08200001, + 0x08200401, 0x00200400, 0x08000001, 0x08200401, + 0x08200000, 0x00000000, 0x00200001, 0x08000001, + 0x00200400, 0x08000400, 0x00000401, 0x00200000, + 0x00000000, 0x00200001, 0x08200400, 0x00000401 }, + /* Box S6 */ + { 0x80000040, 0x81000000, 0x00010000, 0x81010040, + 0x81000000, 0x00000040, 0x81010040, 0x01000000, + 0x80010000, 0x01010040, 0x01000000, 0x80000040, + 0x01000040, 0x80010000, 0x80000000, 0x00010040, + 0x00000000, 0x01000040, 0x80010040, 0x00010000, + 0x01010000, 0x80010040, 0x00000040, 0x81000040, + 0x81000040, 0x00000000, 0x01010040, 0x81010000, + 0x00010040, 0x01010000, 0x81010000, 0x80000000, + 0x80010000, 0x00000040, 0x81000040, 0x01010000, + 0x81010040, 0x01000000, 0x00010040, 0x80000040, + 0x01000000, 0x80010000, 0x80000000, 0x00010040, + 0x80000040, 0x81010040, 0x01010000, 0x81000000, + 0x01010040, 0x81010000, 0x00000000, 0x81000040, + 0x00000040, 0x00010000, 0x81000000, 0x01010040, + 0x00010000, 0x01000040, 0x80010040, 0x00000000, + 0x81010000, 0x80000000, 0x01000040, 0x80010040 }, + /* Box S7 */ + { 0x00800000, 0x10800008, 0x10002008, 0x00000000, + 0x00002000, 0x10002008, 0x00802008, 0x10802000, + 0x10802008, 0x00800000, 0x00000000, 0x10000008, + 0x00000008, 0x10000000, 0x10800008, 0x00002008, + 0x10002000, 0x00802008, 0x00800008, 0x10002000, + 0x10000008, 0x10800000, 0x10802000, 0x00800008, + 0x10800000, 0x00002000, 0x00002008, 0x10802008, + 0x00802000, 0x00000008, 0x10000000, 0x00802000, + 0x10000000, 0x00802000, 0x00800000, 0x10002008, + 0x10002008, 0x10800008, 0x10800008, 0x00000008, + 0x00800008, 0x10000000, 0x10002000, 0x00800000, + 0x10802000, 0x00002008, 0x00802008, 0x10802000, + 0x00002008, 0x10000008, 0x10802008, 0x10800000, + 0x00802000, 0x00000000, 0x00000008, 0x10802008, + 0x00000000, 0x00802008, 0x10800000, 0x00002000, + 0x10000008, 0x10002000, 0x00002000, 0x00800008 }, + /* Box S8 */ + { 0x40004100, 0x00004000, 0x00100000, 0x40104100, + 0x40000000, 0x40004100, 0x00000100, 0x40000000, + 0x00100100, 0x40100000, 0x40104100, 0x00104000, + 0x40104000, 0x00104100, 0x00004000, 0x00000100, + 0x40100000, 0x40000100, 0x40004000, 0x00004100, + 0x00104000, 0x00100100, 0x40100100, 0x40104000, + 0x00004100, 0x00000000, 0x00000000, 0x40100100, + 0x40000100, 0x40004000, 0x00104100, 0x00100000, + 0x00104100, 0x00100000, 0x40104000, 0x00004000, + 0x00000100, 0x40100100, 0x00004000, 0x00104100, + 0x40004000, 0x00000100, 0x40000100, 0x40100000, + 0x40100100, 0x40000000, 0x00100000, 0x40004100, + 0x00000000, 0x40104100, 0x00100100, 0x40000100, + 0x40100000, 0x40004000, 0x40004100, 0x00000000, + 0x40104100, 0x00104000, 0x00104000, 0x00004100, + 0x00004100, 0x00100100, 0x40000000, 0x40104000 } +}; + +static const HALF PC2[8][64] = { + /* table 0 */ + { 0x00000000, 0x00001000, 0x04000000, 0x04001000, + 0x00100000, 0x00101000, 0x04100000, 0x04101000, + 0x00008000, 0x00009000, 0x04008000, 0x04009000, + 0x00108000, 0x00109000, 0x04108000, 0x04109000, + 0x00000004, 0x00001004, 0x04000004, 0x04001004, + 0x00100004, 0x00101004, 0x04100004, 0x04101004, + 0x00008004, 0x00009004, 0x04008004, 0x04009004, + 0x00108004, 0x00109004, 0x04108004, 0x04109004, + 0x08000000, 0x08001000, 0x0c000000, 0x0c001000, + 0x08100000, 0x08101000, 0x0c100000, 0x0c101000, + 0x08008000, 0x08009000, 0x0c008000, 0x0c009000, + 0x08108000, 0x08109000, 0x0c108000, 0x0c109000, + 0x08000004, 0x08001004, 0x0c000004, 0x0c001004, + 0x08100004, 0x08101004, 0x0c100004, 0x0c101004, + 0x08008004, 0x08009004, 0x0c008004, 0x0c009004, + 0x08108004, 0x08109004, 0x0c108004, 0x0c109004 }, + /* table 1 */ + { 0x00000000, 0x00002000, 0x80000000, 0x80002000, + 0x00000008, 0x00002008, 0x80000008, 0x80002008, + 0x00200000, 0x00202000, 0x80200000, 0x80202000, + 0x00200008, 0x00202008, 0x80200008, 0x80202008, + 0x20000000, 0x20002000, 0xa0000000, 0xa0002000, + 0x20000008, 0x20002008, 0xa0000008, 0xa0002008, + 0x20200000, 0x20202000, 0xa0200000, 0xa0202000, + 0x20200008, 0x20202008, 0xa0200008, 0xa0202008, + 0x00000400, 0x00002400, 0x80000400, 0x80002400, + 0x00000408, 0x00002408, 0x80000408, 0x80002408, + 0x00200400, 0x00202400, 0x80200400, 0x80202400, + 0x00200408, 0x00202408, 0x80200408, 0x80202408, + 0x20000400, 0x20002400, 0xa0000400, 0xa0002400, + 0x20000408, 0x20002408, 0xa0000408, 0xa0002408, + 0x20200400, 0x20202400, 0xa0200400, 0xa0202400, + 0x20200408, 0x20202408, 0xa0200408, 0xa0202408 }, + /* table 2 */ + { 0x00000000, 0x00004000, 0x00000020, 0x00004020, + 0x00080000, 0x00084000, 0x00080020, 0x00084020, + 0x00000800, 0x00004800, 0x00000820, 0x00004820, + 0x00080800, 0x00084800, 0x00080820, 0x00084820, + 0x00000010, 0x00004010, 0x00000030, 0x00004030, + 0x00080010, 0x00084010, 0x00080030, 0x00084030, + 0x00000810, 0x00004810, 0x00000830, 0x00004830, + 0x00080810, 0x00084810, 0x00080830, 0x00084830, + 0x00400000, 0x00404000, 0x00400020, 0x00404020, + 0x00480000, 0x00484000, 0x00480020, 0x00484020, + 0x00400800, 0x00404800, 0x00400820, 0x00404820, + 0x00480800, 0x00484800, 0x00480820, 0x00484820, + 0x00400010, 0x00404010, 0x00400030, 0x00404030, + 0x00480010, 0x00484010, 0x00480030, 0x00484030, + 0x00400810, 0x00404810, 0x00400830, 0x00404830, + 0x00480810, 0x00484810, 0x00480830, 0x00484830 }, + /* table 3 */ + { 0x00000000, 0x40000000, 0x00000080, 0x40000080, + 0x00040000, 0x40040000, 0x00040080, 0x40040080, + 0x00000040, 0x40000040, 0x000000c0, 0x400000c0, + 0x00040040, 0x40040040, 0x000400c0, 0x400400c0, + 0x10000000, 0x50000000, 0x10000080, 0x50000080, + 0x10040000, 0x50040000, 0x10040080, 0x50040080, + 0x10000040, 0x50000040, 0x100000c0, 0x500000c0, + 0x10040040, 0x50040040, 0x100400c0, 0x500400c0, + 0x00800000, 0x40800000, 0x00800080, 0x40800080, + 0x00840000, 0x40840000, 0x00840080, 0x40840080, + 0x00800040, 0x40800040, 0x008000c0, 0x408000c0, + 0x00840040, 0x40840040, 0x008400c0, 0x408400c0, + 0x10800000, 0x50800000, 0x10800080, 0x50800080, + 0x10840000, 0x50840000, 0x10840080, 0x50840080, + 0x10800040, 0x50800040, 0x108000c0, 0x508000c0, + 0x10840040, 0x50840040, 0x108400c0, 0x508400c0 }, + /* table 4 */ + { 0x00000000, 0x00000008, 0x08000000, 0x08000008, + 0x00040000, 0x00040008, 0x08040000, 0x08040008, + 0x00002000, 0x00002008, 0x08002000, 0x08002008, + 0x00042000, 0x00042008, 0x08042000, 0x08042008, + 0x80000000, 0x80000008, 0x88000000, 0x88000008, + 0x80040000, 0x80040008, 0x88040000, 0x88040008, + 0x80002000, 0x80002008, 0x88002000, 0x88002008, + 0x80042000, 0x80042008, 0x88042000, 0x88042008, + 0x00080000, 0x00080008, 0x08080000, 0x08080008, + 0x000c0000, 0x000c0008, 0x080c0000, 0x080c0008, + 0x00082000, 0x00082008, 0x08082000, 0x08082008, + 0x000c2000, 0x000c2008, 0x080c2000, 0x080c2008, + 0x80080000, 0x80080008, 0x88080000, 0x88080008, + 0x800c0000, 0x800c0008, 0x880c0000, 0x880c0008, + 0x80082000, 0x80082008, 0x88082000, 0x88082008, + 0x800c2000, 0x800c2008, 0x880c2000, 0x880c2008 }, + /* table 5 */ + { 0x00000000, 0x00400000, 0x00008000, 0x00408000, + 0x40000000, 0x40400000, 0x40008000, 0x40408000, + 0x00000020, 0x00400020, 0x00008020, 0x00408020, + 0x40000020, 0x40400020, 0x40008020, 0x40408020, + 0x00001000, 0x00401000, 0x00009000, 0x00409000, + 0x40001000, 0x40401000, 0x40009000, 0x40409000, + 0x00001020, 0x00401020, 0x00009020, 0x00409020, + 0x40001020, 0x40401020, 0x40009020, 0x40409020, + 0x00100000, 0x00500000, 0x00108000, 0x00508000, + 0x40100000, 0x40500000, 0x40108000, 0x40508000, + 0x00100020, 0x00500020, 0x00108020, 0x00508020, + 0x40100020, 0x40500020, 0x40108020, 0x40508020, + 0x00101000, 0x00501000, 0x00109000, 0x00509000, + 0x40101000, 0x40501000, 0x40109000, 0x40509000, + 0x00101020, 0x00501020, 0x00109020, 0x00509020, + 0x40101020, 0x40501020, 0x40109020, 0x40509020 }, + /* table 6 */ + { 0x00000000, 0x00000040, 0x04000000, 0x04000040, + 0x00000800, 0x00000840, 0x04000800, 0x04000840, + 0x00800000, 0x00800040, 0x04800000, 0x04800040, + 0x00800800, 0x00800840, 0x04800800, 0x04800840, + 0x10000000, 0x10000040, 0x14000000, 0x14000040, + 0x10000800, 0x10000840, 0x14000800, 0x14000840, + 0x10800000, 0x10800040, 0x14800000, 0x14800040, + 0x10800800, 0x10800840, 0x14800800, 0x14800840, + 0x00000080, 0x000000c0, 0x04000080, 0x040000c0, + 0x00000880, 0x000008c0, 0x04000880, 0x040008c0, + 0x00800080, 0x008000c0, 0x04800080, 0x048000c0, + 0x00800880, 0x008008c0, 0x04800880, 0x048008c0, + 0x10000080, 0x100000c0, 0x14000080, 0x140000c0, + 0x10000880, 0x100008c0, 0x14000880, 0x140008c0, + 0x10800080, 0x108000c0, 0x14800080, 0x148000c0, + 0x10800880, 0x108008c0, 0x14800880, 0x148008c0 }, + /* table 7 */ + { 0x00000000, 0x00000010, 0x00000400, 0x00000410, + 0x00000004, 0x00000014, 0x00000404, 0x00000414, + 0x00004000, 0x00004010, 0x00004400, 0x00004410, + 0x00004004, 0x00004014, 0x00004404, 0x00004414, + 0x20000000, 0x20000010, 0x20000400, 0x20000410, + 0x20000004, 0x20000014, 0x20000404, 0x20000414, + 0x20004000, 0x20004010, 0x20004400, 0x20004410, + 0x20004004, 0x20004014, 0x20004404, 0x20004414, + 0x00200000, 0x00200010, 0x00200400, 0x00200410, + 0x00200004, 0x00200014, 0x00200404, 0x00200414, + 0x00204000, 0x00204010, 0x00204400, 0x00204410, + 0x00204004, 0x00204014, 0x00204404, 0x00204414, + 0x20200000, 0x20200010, 0x20200400, 0x20200410, + 0x20200004, 0x20200014, 0x20200404, 0x20200414, + 0x20204000, 0x20204010, 0x20204400, 0x20204410, + 0x20204004, 0x20204014, 0x20204404, 0x20204414 } +}; + +/* + * The PC-1 Permutation + * If we number the bits of the 8 bytes of key input like this (in octal): + * 00 01 02 03 04 05 06 07 + * 10 11 12 13 14 15 16 17 + * 20 21 22 23 24 25 26 27 + * 30 31 32 33 34 35 36 37 + * 40 41 42 43 44 45 46 47 + * 50 51 52 53 54 55 56 57 + * 60 61 62 63 64 65 66 67 + * 70 71 72 73 74 75 76 77 + * then after the PC-1 permutation, + * C0 is + * 70 60 50 40 30 20 10 00 + * 71 61 51 41 31 21 11 01 + * 72 62 52 42 32 22 12 02 + * 73 63 53 43 + * D0 is + * 76 66 56 46 36 26 16 06 + * 75 65 55 45 35 25 15 05 + * 74 64 54 44 34 24 14 04 + * 33 23 13 03 + * and these parity bits have been discarded: + * 77 67 57 47 37 27 17 07 + * + * We achieve this by flipping the input matrix about the diagonal from 70-07, + * getting left = + * 77 67 57 47 37 27 17 07 (these are the parity bits) + * 76 66 56 46 36 26 16 06 + * 75 65 55 45 35 25 15 05 + * 74 64 54 44 34 24 14 04 + * right = + * 73 63 53 43 33 23 13 03 + * 72 62 52 42 32 22 12 02 + * 71 61 51 41 31 21 11 01 + * 70 60 50 40 30 20 10 00 + * then byte swap right, ala htonl() on a little endian machine. + * right = + * 70 60 50 40 30 20 10 00 + * 71 67 57 47 37 27 11 07 + * 72 62 52 42 32 22 12 02 + * 73 63 53 43 33 23 13 03 + * then + * c0 = right >> 4; + * d0 = ((left & 0x00ffffff) << 4) | (right & 0xf); +*/ + +#define FLIP_RIGHT_DIAGONAL(word, temp) \ + temp = (word ^ (word >> 18)) & 0x00003333; \ + word ^= temp | (temp << 18); \ + temp = (word ^ (word >> 9)) & 0x00550055; \ + word ^= temp | (temp << 9); + +#if defined(__GNUC__) && defined(NSS_X86_OR_X64) +#define BYTESWAP(word, temp) \ + __asm("bswap %0" \ + : "+r"(word)); +#elif (_MSC_VER >= 1300) && defined(NSS_X86_OR_X64) +#include +#pragma intrinsic(_byteswap_ulong) +#define BYTESWAP(word, temp) \ + word = _byteswap_ulong(word); +#elif defined(__GNUC__) && (defined(__thumb2__) || \ + (!defined(__thumb__) && \ + (defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)))) +#define BYTESWAP(word, temp) \ + __asm("rev %0, %0" \ + : "+r"(word)); +#else +#define BYTESWAP(word, temp) \ + word = (word >> 16) | (word << 16); \ + temp = 0x00ff00ff; \ + word = ((word & temp) << 8) | ((word >> 8) & temp); +#endif + +#define PC1(left, right, c0, d0, temp) \ + right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \ + left ^= temp << 4; \ + FLIP_RIGHT_DIAGONAL(left, temp); \ + FLIP_RIGHT_DIAGONAL(right, temp); \ + BYTESWAP(right, temp); \ + c0 = right >> 4; \ + d0 = ((left & 0x00ffffff) << 4) | (right & 0xf); + +#define LEFT_SHIFT_1(reg) (((reg << 1) | (reg >> 27)) & 0x0FFFFFFF) +#define LEFT_SHIFT_2(reg) (((reg << 2) | (reg >> 26)) & 0x0FFFFFFF) + +/* + * setup key schedules from key + */ + +void +DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction) +{ + register HALF left, right; + register HALF c0, d0; + register HALF temp; + int delta; + unsigned int ls; + +#if defined(HAVE_UNALIGNED_ACCESS) + left = HALFPTR(key)[0]; + right = HALFPTR(key)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif +#else + if (((ptrdiff_t)key & 0x03) == 0) { + left = HALFPTR(key)[0]; + right = HALFPTR(key)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + } else { + left = ((HALF)key[0] << 24) | ((HALF)key[1] << 16) | + ((HALF)key[2] << 8) | key[3]; + right = ((HALF)key[4] << 24) | ((HALF)key[5] << 16) | + ((HALF)key[6] << 8) | key[7]; + } +#endif + + PC1(left, right, c0, d0, temp); + + if (direction == DES_ENCRYPT) { + delta = 2 * (int)sizeof(HALF); + } else { + ks += 30; + delta = (-2) * (int)sizeof(HALF); + } + + for (ls = 0x8103; ls; ls >>= 1) { + if (ls & 1) { + c0 = LEFT_SHIFT_1(c0); + d0 = LEFT_SHIFT_1(d0); + } else { + c0 = LEFT_SHIFT_2(c0); + d0 = LEFT_SHIFT_2(d0); + } + +#ifdef USE_INDEXING +#define PC2LOOKUP(b, c) PC2[b][c] + + left = PC2LOOKUP(0, ((c0 >> 22) & 0x3F)); + left |= PC2LOOKUP(1, ((c0 >> 13) & 0x3F)); + left |= PC2LOOKUP(2, ((c0 >> 4) & 0x38) | (c0 & 0x7)); + left |= PC2LOOKUP(3, ((c0 >> 18) & 0xC) | ((c0 >> 11) & 0x3) | (c0 & 0x30)); + + right = PC2LOOKUP(4, ((d0 >> 22) & 0x3F)); + right |= PC2LOOKUP(5, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf)); + right |= PC2LOOKUP(6, ((d0 >> 7) & 0x3F)); + right |= PC2LOOKUP(7, ((d0 >> 1) & 0x3C) | (d0 & 0x3)); +#else +#define PC2LOOKUP(b, c) *(HALF *)((BYTE *)&PC2[b][0] + (c)) + + left = PC2LOOKUP(0, ((c0 >> 20) & 0xFC)); + left |= PC2LOOKUP(1, ((c0 >> 11) & 0xFC)); + left |= PC2LOOKUP(2, ((c0 >> 2) & 0xE0) | ((c0 << 2) & 0x1C)); + left |= PC2LOOKUP(3, ((c0 >> 16) & 0x30) | ((c0 >> 9) & 0xC) | ((c0 << 2) & 0xC0)); + + right = PC2LOOKUP(4, ((d0 >> 20) & 0xFC)); + right |= PC2LOOKUP(5, ((d0 >> 13) & 0xC0) | ((d0 >> 12) & 0x3C)); + right |= PC2LOOKUP(6, ((d0 >> 5) & 0xFC)); + right |= PC2LOOKUP(7, ((d0 << 1) & 0xF0) | ((d0 << 2) & 0x0C)); +#endif + /* left contains key bits for S1 S3 S2 S4 */ + /* right contains key bits for S6 S8 S5 S7 */ + temp = (left << 16) /* S2 S4 XX XX */ + | (right >> 16); /* XX XX S6 S8 */ + ks[0] = temp; + + temp = (left & 0xffff0000) /* S1 S3 XX XX */ + | (right & 0x0000ffff); /* XX XX S5 S7 */ + ks[1] = temp; + + ks = (HALF *)((BYTE *)ks + delta); + } +} + +/* + * The DES Initial Permutation + * if we number the bits of the 8 bytes of input like this (in octal): + * 00 01 02 03 04 05 06 07 + * 10 11 12 13 14 15 16 17 + * 20 21 22 23 24 25 26 27 + * 30 31 32 33 34 35 36 37 + * 40 41 42 43 44 45 46 47 + * 50 51 52 53 54 55 56 57 + * 60 61 62 63 64 65 66 67 + * 70 71 72 73 74 75 76 77 + * then after the initial permutation, they will be in this order. + * 71 61 51 41 31 21 11 01 + * 73 63 53 43 33 23 13 03 + * 75 65 55 45 35 25 15 05 + * 77 67 57 47 37 27 17 07 + * 70 60 50 40 30 20 10 00 + * 72 62 52 42 32 22 12 02 + * 74 64 54 44 34 24 14 04 + * 76 66 56 46 36 26 16 06 + * + * One way to do this is in two steps: + * 1. Flip this matrix about the diagonal from 70-07 as done for PC1. + * 2. Rearrange the bytes (rows in the matrix above) with the following code. + * + * #define swapHiLo(word, temp) \ + * temp = (word ^ (word >> 24)) & 0x000000ff; \ + * word ^= temp | (temp << 24); + * + * right ^= temp = ((left << 8) ^ right) & 0xff00ff00; + * left ^= temp >> 8; + * swapHiLo(left, temp); + * swapHiLo(right,temp); + * + * However, the two steps can be combined, so that the rows are rearranged + * while the matrix is being flipped, reducing the number of bit exchange + * operations from 8 ot 5. + * + * Initial Permutation */ +#define IP(left, right, temp) \ + right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \ + left ^= temp << 4; \ + right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \ + left ^= temp << 16; \ + right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \ + left ^= temp >> 2; \ + right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \ + left ^= temp >> 8; \ + right ^= temp = ((left >> 1) ^ right) & 0x55555555; \ + left ^= temp << 1; + +/* The Final (Inverse Initial) permutation is done by reversing the +** steps of the Initital Permutation +*/ + +#define FP(left, right, temp) \ + right ^= temp = ((left >> 1) ^ right) & 0x55555555; \ + left ^= temp << 1; \ + right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \ + left ^= temp >> 8; \ + right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \ + left ^= temp >> 2; \ + right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \ + left ^= temp << 16; \ + right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \ + left ^= temp << 4; + +void NO_SANITIZE_ALIGNMENT +DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf) +{ + register HALF left, right; + register HALF temp; + +#if defined(HAVE_UNALIGNED_ACCESS) + left = HALFPTR(inbuf)[0]; + right = HALFPTR(inbuf)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif +#else + if (((ptrdiff_t)inbuf & 0x03) == 0) { + left = HALFPTR(inbuf)[0]; + right = HALFPTR(inbuf)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + } else { + left = ((HALF)inbuf[0] << 24) | ((HALF)inbuf[1] << 16) | + ((HALF)inbuf[2] << 8) | inbuf[3]; + right = ((HALF)inbuf[4] << 24) | ((HALF)inbuf[5] << 16) | + ((HALF)inbuf[6] << 8) | inbuf[7]; + } +#endif + + IP(left, right, temp); + + /* shift the values left circularly 3 bits. */ + left = (left << 3) | (left >> 29); + right = (right << 3) | (right >> 29); + +#ifdef USE_INDEXING +#define KSLOOKUP(s, b) SP[s][((temp >> (b + 2)) & 0x3f)] +#else +#define KSLOOKUP(s, b) *(HALF *)((BYTE *)&SP[s][0] + ((temp >> b) & 0xFC)) +#endif +#define ROUND(out, in, r) \ + temp = in ^ ks[2 * r]; \ + out ^= KSLOOKUP(1, 24); \ + out ^= KSLOOKUP(3, 16); \ + out ^= KSLOOKUP(5, 8); \ + out ^= KSLOOKUP(7, 0); \ + temp = ((in >> 4) | (in << 28)) ^ ks[2 * r + 1]; \ + out ^= KSLOOKUP(0, 24); \ + out ^= KSLOOKUP(2, 16); \ + out ^= KSLOOKUP(4, 8); \ + out ^= KSLOOKUP(6, 0); + + /* Do the 16 Feistel rounds */ + ROUND(left, right, 0) + ROUND(right, left, 1) + ROUND(left, right, 2) + ROUND(right, left, 3) + ROUND(left, right, 4) + ROUND(right, left, 5) + ROUND(left, right, 6) + ROUND(right, left, 7) + ROUND(left, right, 8) + ROUND(right, left, 9) + ROUND(left, right, 10) + ROUND(right, left, 11) + ROUND(left, right, 12) + ROUND(right, left, 13) + ROUND(left, right, 14) + ROUND(right, left, 15) + + /* now shift circularly right 3 bits to undo the shifting done + ** above. switch left and right here. + */ + temp = (left >> 3) | (left << 29); + left = (right >> 3) | (right << 29); + right = temp; + + FP(left, right, temp); + +#if defined(HAVE_UNALIGNED_ACCESS) +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + HALFPTR(outbuf) + [0] = left; + HALFPTR(outbuf) + [1] = right; +#else + if (((ptrdiff_t)outbuf & 0x03) == 0) { +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + HALFPTR(outbuf) + [0] = left; + HALFPTR(outbuf) + [1] = right; + } else { + outbuf[0] = (BYTE)(left >> 24); + outbuf[1] = (BYTE)(left >> 16); + outbuf[2] = (BYTE)(left >> 8); + outbuf[3] = (BYTE)(left); + + outbuf[4] = (BYTE)(right >> 24); + outbuf[5] = (BYTE)(right >> 16); + outbuf[6] = (BYTE)(right >> 8); + outbuf[7] = (BYTE)(right); + } +#endif +} + +/* Ackowledgements: +** Two ideas used in this implementation were shown to me by Dennis Ferguson +** in 1990. He credits them to Richard Outerbridge and Dan Hoey. They were: +** 1. The method of computing the Initial and Final permutations. +** 2. Circularly rotating the SP tables and the initial values of left and +** right to reduce the number of shifts required during the 16 rounds. +*/ diff --git a/security/nss/lib/freebl/des.h b/security/nss/lib/freebl/des.h new file mode 100644 index 0000000000..70a17e5108 --- /dev/null +++ b/security/nss/lib/freebl/des.h @@ -0,0 +1,43 @@ +/* + * des.h + * + * header file for DES-150 library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _DES_H_ +#define _DES_H_ 1 + +#include "blapi.h" + +typedef unsigned char BYTE; +typedef unsigned int HALF; + +#define HALFPTR(x) ((HALF *)(x)) +#define SHORTPTR(x) ((unsigned short *)(x)) +#define BYTEPTR(x) ((BYTE *)(x)) + +typedef enum { + DES_ENCRYPT = 0x5555, + DES_DECRYPT = 0xAAAA +} DESDirection; + +typedef void DESFunc(struct DESContextStr *cx, BYTE *out, const BYTE *in, + unsigned int len); + +struct DESContextStr { + /* key schedule, 16 internal keys, each with 8 6-bit parts */ + HALF ks0[32]; + HALF ks1[32]; + HALF ks2[32]; + HALF iv[2]; + DESDirection direction; + DESFunc *worker; +}; + +void DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction); +void DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf); + +#endif diff --git a/security/nss/lib/freebl/desblapi.c b/security/nss/lib/freebl/desblapi.c new file mode 100644 index 0000000000..c03ab27cce --- /dev/null +++ b/security/nss/lib/freebl/desblapi.c @@ -0,0 +1,256 @@ +/* + * desblapi.c + * + * core source file for DES-150 library + * Implement DES Modes of Operation and Triple-DES. + * Adapt DES-150 to blapi API. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "des.h" +#include "blapii.h" +#include +#include "secerr.h" + +#if defined(NSS_X86_OR_X64) +/* Intel X86 CPUs do unaligned loads and stores without complaint. */ +#define COPY8B(to, from, ptr) \ + HALFPTR(to) \ + [0] = HALFPTR(from)[0]; \ + HALFPTR(to) \ + [1] = HALFPTR(from)[1]; +#else +#define COPY8B(to, from, ptr) memcpy(to, from, 8) +#endif +#define COPY8BTOHALF(to, from) COPY8B(to, from, from) +#define COPY8BFROMHALF(to, from) COPY8B(to, from, to) + +static void +DES_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + while (len) { + DES_Do1Block(cx->ks0, in, out); + len -= 8; + in += 8; + out += 8; + } +} + +static void +DES_EDE3_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + while (len) { + DES_Do1Block(cx->ks0, in, out); + len -= 8; + in += 8; + DES_Do1Block(cx->ks1, out, out); + DES_Do1Block(cx->ks2, out, out); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend = in + len; + HALF vec[2]; + + while (in != bufend) { + COPY8BTOHALF(vec, in); + in += 8; + vec[0] ^= cx->iv[0]; + vec[1] ^= cx->iv[1]; + DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv); + COPY8BFROMHALF(out, cx->iv); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend; + HALF oldciphertext[2]; + HALF plaintext[2]; + + for (bufend = in + len; in != bufend;) { + oldciphertext[0] = cx->iv[0]; + oldciphertext[1] = cx->iv[1]; + COPY8BTOHALF(cx->iv, in); + in += 8; + DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext); + plaintext[0] ^= oldciphertext[0]; + plaintext[1] ^= oldciphertext[1]; + COPY8BFROMHALF(out, plaintext); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_EDE3CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend = in + len; + HALF vec[2]; + + while (in != bufend) { + COPY8BTOHALF(vec, in); + in += 8; + vec[0] ^= cx->iv[0]; + vec[1] ^= cx->iv[1]; + DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv); + DES_Do1Block(cx->ks1, (BYTE *)cx->iv, (BYTE *)cx->iv); + DES_Do1Block(cx->ks2, (BYTE *)cx->iv, (BYTE *)cx->iv); + COPY8BFROMHALF(out, cx->iv); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_EDE3CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend; + HALF oldciphertext[2]; + HALF plaintext[2]; + + for (bufend = in + len; in != bufend;) { + oldciphertext[0] = cx->iv[0]; + oldciphertext[1] = cx->iv[1]; + COPY8BTOHALF(cx->iv, in); + in += 8; + DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext); + DES_Do1Block(cx->ks1, (BYTE *)plaintext, (BYTE *)plaintext); + DES_Do1Block(cx->ks2, (BYTE *)plaintext, (BYTE *)plaintext); + plaintext[0] ^= oldciphertext[0]; + plaintext[1] ^= oldciphertext[1]; + COPY8BFROMHALF(out, plaintext); + out += 8; + } +} + +DESContext * +DES_AllocateContext(void) +{ + return PORT_ZNew(DESContext); +} + +SECStatus +DES_InitContext(DESContext *cx, const unsigned char *key, unsigned int keylen, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int unused) +{ + DESDirection opposite; + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + cx->direction = encrypt ? DES_ENCRYPT : DES_DECRYPT; + opposite = encrypt ? DES_DECRYPT : DES_ENCRYPT; + switch (mode) { + case NSS_DES: /* DES ECB */ + DES_MakeSchedule(cx->ks0, key, cx->direction); + cx->worker = &DES_ECB; + break; + + case NSS_DES_EDE3: /* DES EDE ECB */ + cx->worker = &DES_EDE3_ECB; + if (encrypt) { + DES_MakeSchedule(cx->ks0, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks2, key + 16, cx->direction); + } else { + DES_MakeSchedule(cx->ks2, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks0, key + 16, cx->direction); + } + break; + + case NSS_DES_CBC: /* DES CBC */ + COPY8BTOHALF(cx->iv, iv); + cx->worker = encrypt ? &DES_CBCEn : &DES_CBCDe; + DES_MakeSchedule(cx->ks0, key, cx->direction); + break; + + case NSS_DES_EDE3_CBC: /* DES EDE CBC */ + COPY8BTOHALF(cx->iv, iv); + if (encrypt) { + cx->worker = &DES_EDE3CBCEn; + DES_MakeSchedule(cx->ks0, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks2, key + 16, cx->direction); + } else { + cx->worker = &DES_EDE3CBCDe; + DES_MakeSchedule(cx->ks2, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks0, key + 16, cx->direction); + } + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return SECSuccess; +} + +DESContext * +DES_CreateContext(const BYTE *key, const BYTE *iv, int mode, PRBool encrypt) +{ + DESContext *cx = PORT_ZNew(DESContext); + SECStatus rv = DES_InitContext(cx, key, 0, iv, mode, encrypt, 0); + + if (rv != SECSuccess) { + PORT_ZFree(cx, sizeof *cx); + cx = NULL; + } + return cx; +} + +void +DES_DestroyContext(DESContext *cx, PRBool freeit) +{ + if (cx) { + memset(cx, 0, sizeof *cx); + if (freeit) + PORT_Free(cx); + } +} + +SECStatus +DES_Encrypt(DESContext *cx, BYTE *out, unsigned int *outLen, + unsigned int maxOutLen, const BYTE *in, unsigned int inLen) +{ + + if ((inLen % 8) != 0 || maxOutLen < inLen || !cx || + cx->direction != DES_ENCRYPT) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + cx->worker(cx, out, in, inLen); + if (outLen) + *outLen = inLen; + return SECSuccess; +} + +SECStatus +DES_Decrypt(DESContext *cx, BYTE *out, unsigned int *outLen, + unsigned int maxOutLen, const BYTE *in, unsigned int inLen) +{ + + if ((inLen % 8) != 0 || maxOutLen < inLen || !cx || + cx->direction != DES_DECRYPT) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + cx->worker(cx, out, in, inLen); + if (outLen) + *outLen = inLen; + return SECSuccess; +} diff --git a/security/nss/lib/freebl/det_rng.c b/security/nss/lib/freebl/det_rng.c new file mode 100644 index 0000000000..f50a3c4446 --- /dev/null +++ b/security/nss/lib/freebl/det_rng.c @@ -0,0 +1,163 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "blapi.h" +#include "blapit.h" +#include "Hacl_Chacha20.h" +#include "nssilock.h" +#include "seccomon.h" +#include "secerr.h" +#include "prinit.h" + +#define GLOBAL_BYTES_SIZE 100 +static PRUint8 globalBytes[GLOBAL_BYTES_SIZE]; +static unsigned long globalNumCalls = 0; +static PZLock *rng_lock = NULL; +static PRCallOnceType coRNGInit; +static const PRCallOnceType pristineCallOnce; + +static PRStatus +rng_init(void) +{ + rng_lock = PZ_NewLock(nssILockOther); + if (!rng_lock) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return PR_FAILURE; + } + /* --- LOCKED --- */ + PZ_Lock(rng_lock); + memset(globalBytes, 0, GLOBAL_BYTES_SIZE); + PZ_Unlock(rng_lock); + /* --- UNLOCKED --- */ + + return PR_SUCCESS; +} + +SECStatus +RNG_RNGInit(void) +{ + /* Allow only one call to initialize the context */ + if (PR_CallOnce(&coRNGInit, rng_init) != PR_SUCCESS) { + return SECFailure; + } + + return SECSuccess; +} + +/* Take min(size, GLOBAL_BYTES_SIZE) bytes from data and use as seed and reset + * the rng state. */ +SECStatus +RNG_RandomUpdate(const void *data, size_t bytes) +{ + /* Check for a valid RNG lock. */ + PORT_Assert(rng_lock != NULL); + if (rng_lock == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* --- LOCKED --- */ + PZ_Lock(rng_lock); + memset(globalBytes, 0, GLOBAL_BYTES_SIZE); + globalNumCalls = 0; + if (data) { + memcpy(globalBytes, (PRUint8 *)data, PR_MIN(bytes, GLOBAL_BYTES_SIZE)); + } + PZ_Unlock(rng_lock); + /* --- UNLOCKED --- */ + + return SECSuccess; +} + +SECStatus +RNG_GenerateGlobalRandomBytes(void *dest, size_t len) +{ + static const uint8_t key[32] = { 0 }; + uint8_t nonce[12] = { 0 }; + + /* Check for a valid RNG lock. */ + PORT_Assert(rng_lock != NULL); + if (rng_lock == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* --- LOCKED --- */ + PZ_Lock(rng_lock); + + memcpy(nonce, &globalNumCalls, sizeof(globalNumCalls)); + globalNumCalls++; + + ChaCha20Poly1305Context *cx = + ChaCha20Poly1305_CreateContext(key, sizeof(key), 16); + if (!cx) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PZ_Unlock(rng_lock); + return SECFailure; + } + + memset(dest, 0, len); + memcpy(dest, globalBytes, PR_MIN(len, GLOBAL_BYTES_SIZE)); + Hacl_Chacha20_chacha20_encrypt(len, (uint8_t *)dest, (uint8_t *)dest, + (uint8_t *)key, nonce, 0); + ChaCha20Poly1305_DestroyContext(cx, PR_TRUE); + + PZ_Unlock(rng_lock); + /* --- UNLOCKED --- */ + + return SECSuccess; +} + +void +RNG_RNGShutdown(void) +{ + if (rng_lock) { + PZ_DestroyLock(rng_lock); + rng_lock = NULL; + } + coRNGInit = pristineCallOnce; +} + +/* Test functions are not implemented! */ +SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + return SECFailure; +} + +SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len) +{ + return SECFailure; +} + +SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len) +{ + return SECFailure; +} + +SECStatus +PRNGTEST_Uninstantiate() +{ + return SECFailure; +} + +SECStatus +PRNGTEST_RunHealthTests() +{ + return SECFailure; +} + +SECStatus +PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + return SECFailure; +} diff --git a/security/nss/lib/freebl/det_rng.h b/security/nss/lib/freebl/det_rng.h new file mode 100644 index 0000000000..599d726ca2 --- /dev/null +++ b/security/nss/lib/freebl/det_rng.h @@ -0,0 +1,12 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __det_rng_h_ +#define __det_rng_h_ + +SECStatus prng_ResetForFuzzing(PZLock *rng_lock); +SECStatus prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest, + size_t len); + +#endif /* __det_rng_h_ */ diff --git a/security/nss/lib/freebl/dh.c b/security/nss/lib/freebl/dh.c new file mode 100644 index 0000000000..bdd5dd63ae --- /dev/null +++ b/security/nss/lib/freebl/dh.c @@ -0,0 +1,480 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Diffie-Hellman parameter generation, key generation, and secret derivation. + * KEA secret generation and verification. + */ +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "blapi.h" +#include "blapii.h" +#include "secitem.h" +#include "mpi.h" +#include "secmpi.h" + +#define KEA_DERIVED_SECRET_LEN 128 + +/* Lengths are in bytes. */ +static unsigned int +dh_GetSecretKeyLen(unsigned int primeLen) +{ + /* Based on Table 2 in NIST SP 800-57. */ + if (primeLen >= 1920) { /* 15360 bits */ + return 64; /* 512 bits */ + } + if (primeLen >= 960) { /* 7680 bits */ + return 48; /* 384 bits */ + } + if (primeLen >= 384) { /* 3072 bits */ + return 32; /* 256 bits */ + } + if (primeLen >= 256) { /* 2048 bits */ + return 28; /* 224 bits */ + } + return 20; /* 160 bits */ +} + +SECStatus +DH_GenParam(int primeLen, DHParams **params) +{ + PLArenaPool *arena; + DHParams *dhparams; + unsigned char *ab = NULL; + mp_int p, q, a, h, psub1, test; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + if (!params || primeLen < 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + dhparams = (DHParams *)PORT_ArenaZAlloc(arena, sizeof(DHParams)); + if (!dhparams) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + dhparams->arena = arena; + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&psub1) = 0; + MP_DIGITS(&test) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&psub1)); + CHECK_MPI_OK(mp_init(&test)); + /* generate prime with MPI, uses Miller-Rabin to generate safe prime. */ + CHECK_SEC_OK(generate_prime(&p, primeLen)); + /* construct Sophie-Germain prime q = (p-1)/2. */ + CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1)); + CHECK_MPI_OK(mp_div_2(&psub1, &q)); + /* construct a generator from the prime. */ + ab = PORT_Alloc(primeLen); + if (!ab) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + rv = SECFailure; + goto cleanup; + } + /* generate a candidate number a in p's field */ + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(ab, primeLen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&a, ab, primeLen)); + /* force a < p (note that quot(a/p) <= 1) */ + if (mp_cmp(&a, &p) > 0) + CHECK_MPI_OK(mp_sub(&a, &p, &a)); + do { + /* check that a is in the range [2..p-1] */ + if (mp_cmp_d(&a, 2) < 0 || mp_cmp(&a, &psub1) >= 0) { + /* a is outside of the allowed range. Set a=3 and keep going. */ + mp_set(&a, 3); + } + /* if a**q mod p != 1 then a is a generator */ + CHECK_MPI_OK(mp_exptmod(&a, &q, &p, &test)); + if (mp_cmp_d(&test, 1) != 0) + break; + /* increment the candidate and try again. */ + CHECK_MPI_OK(mp_add_d(&a, 1, &a)); + } while (PR_TRUE); + MPINT_TO_SECITEM(&p, &dhparams->prime, arena); + MPINT_TO_SECITEM(&a, &dhparams->base, arena); + *params = dhparams; +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&a); + mp_clear(&h); + mp_clear(&psub1); + mp_clear(&test); + if (ab) { + PORT_ZFree(ab, primeLen); + } + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv != SECSuccess) { + PORT_FreeArena(arena, PR_TRUE); + } + return rv; +} + +SECStatus +DH_NewKey(DHParams *params, DHPrivateKey **privKey) +{ + PLArenaPool *arena; + DHPrivateKey *key; + mp_int g, xa, p, Ya; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + if (!params || !privKey) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + key = (DHPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DHPrivateKey)); + if (!key) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + key->arena = arena; + MP_DIGITS(&g) = 0; + MP_DIGITS(&xa) = 0; + MP_DIGITS(&p) = 0; + MP_DIGITS(&Ya) = 0; + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&xa)); + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&Ya)); + /* Set private key's p */ + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->prime, ¶ms->prime)); + SECITEM_TO_MPINT(key->prime, &p); + /* Set private key's g */ + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->base, ¶ms->base)); + SECITEM_TO_MPINT(key->base, &g); + /* Generate private key xa */ + SECITEM_AllocItem(arena, &key->privateValue, + dh_GetSecretKeyLen(params->prime.len)); + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(key->privateValue.data, + key->privateValue.len)); + SECITEM_TO_MPINT(key->privateValue, &xa); + /* xa < p */ + CHECK_MPI_OK(mp_mod(&xa, &p, &xa)); + /* Compute public key Ya = g ** xa mod p */ + CHECK_MPI_OK(mp_exptmod(&g, &xa, &p, &Ya)); + MPINT_TO_SECITEM(&Ya, &key->publicValue, key->arena); + *privKey = key; +cleanup: + mp_clear(&g); + mp_clear(&xa); + mp_clear(&p); + mp_clear(&Ya); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv) { + *privKey = NULL; + PORT_FreeArena(arena, PR_TRUE); + } + return rv; +} + +SECStatus +DH_Derive(SECItem *publicValue, + SECItem *prime, + SECItem *privateValue, + SECItem *derivedSecret, + unsigned int outBytes) +{ + mp_int p, Xa, Yb, ZZ, psub1; + mp_err err = MP_OKAY; + unsigned int len = 0; + unsigned int nb; + unsigned char *secret = NULL; + if (!publicValue || !publicValue->len || !prime || !prime->len || + !privateValue || !privateValue->len || !derivedSecret) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + memset(derivedSecret, 0, sizeof *derivedSecret); + MP_DIGITS(&p) = 0; + MP_DIGITS(&Xa) = 0; + MP_DIGITS(&Yb) = 0; + MP_DIGITS(&ZZ) = 0; + MP_DIGITS(&psub1) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&Xa)); + CHECK_MPI_OK(mp_init(&Yb)); + CHECK_MPI_OK(mp_init(&ZZ)); + CHECK_MPI_OK(mp_init(&psub1)); + SECITEM_TO_MPINT(*publicValue, &Yb); + SECITEM_TO_MPINT(*privateValue, &Xa); + SECITEM_TO_MPINT(*prime, &p); + CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1)); + + /* We assume that the modulus, p, is a safe prime. That is, p = 2q+1 where + * q is also a prime. Thus the orders of the subgroups are factors of 2q: + * namely 1, 2, q and 2q. + * + * We check that the peer's public value isn't zero (which isn't in the + * group), one (subgroup of order one) or p-1 (subgroup of order 2). We + * also check that the public value is less than p, to avoid being fooled + * by values like p+1 or 2*p-1. + * + * Thus we must be operating in the subgroup of size q or 2q. */ + if (mp_cmp_d(&Yb, 1) <= 0 || + mp_cmp(&Yb, &psub1) >= 0) { + err = MP_BADARG; + goto cleanup; + } + + /* ZZ = (Yb)**Xa mod p */ + CHECK_MPI_OK(mp_exptmod(&Yb, &Xa, &p, &ZZ)); + /* number of bytes in the derived secret */ + len = mp_unsigned_octet_size(&ZZ); + if (len <= 0) { + err = MP_BADARG; + goto cleanup; + } + + /* + * We check to make sure that ZZ is not equal to 0, 1 or -1 mod p. + * This helps guard against small subgroup attacks, since an attacker + * using a subgroup of size N will produce 0, 1 or -1 with probability 1/N. + * When the protocol is executed within a properly large subgroup, the + * probability of this result will be negligibly small. For example, + * with a safe prime of the form 2q+1, the probability will be 1/q. + * + * We return MP_BADARG because this is probably the result of a bad + * public value or a bad prime having been provided. + */ + if (mp_cmp_d(&ZZ, 0) == 0 || mp_cmp_d(&ZZ, 1) == 0 || + mp_cmp(&ZZ, &psub1) == 0) { + err = MP_BADARG; + goto cleanup; + } + + /* allocate a buffer which can hold the entire derived secret. */ + secret = PORT_Alloc(len); + if (secret == NULL) { + err = MP_MEM; + goto cleanup; + } + /* grab the derived secret */ + err = mp_to_unsigned_octets(&ZZ, secret, len); + if (err >= 0) + err = MP_OKAY; + /* + ** if outBytes is 0 take all of the bytes from the derived secret. + ** if outBytes is not 0 take exactly outBytes from the derived secret, zero + ** pad at the beginning if necessary, and truncate beginning bytes + ** if necessary. + */ + if (outBytes > 0) + nb = outBytes; + else + nb = len; + if (SECITEM_AllocItem(NULL, derivedSecret, nb) == NULL) { + err = MP_MEM; + goto cleanup; + } + if (len < nb) { + unsigned int offset = nb - len; + memset(derivedSecret->data, 0, offset); + memcpy(derivedSecret->data + offset, secret, len); + } else { + memcpy(derivedSecret->data, secret + len - nb, nb); + } +cleanup: + mp_clear(&p); + mp_clear(&Xa); + mp_clear(&Yb); + mp_clear(&ZZ); + mp_clear(&psub1); + if (secret) { + /* free the buffer allocated for the full secret. */ + PORT_ZFree(secret, len); + } + if (err) { + MP_TO_SEC_ERROR(err); + if (derivedSecret->data) + PORT_ZFree(derivedSecret->data, derivedSecret->len); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +KEA_Derive(SECItem *prime, + SECItem *public1, + SECItem *public2, + SECItem *private1, + SECItem *private2, + SECItem *derivedSecret) +{ + mp_int p, Y, R, r, x, t, u, w; + mp_err err; + unsigned char *secret = NULL; + unsigned int len = 0, offset; + if (!prime || !public1 || !public2 || !private1 || !private2 || + !derivedSecret) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + memset(derivedSecret, 0, sizeof *derivedSecret); + MP_DIGITS(&p) = 0; + MP_DIGITS(&Y) = 0; + MP_DIGITS(&R) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&x) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&u) = 0; + MP_DIGITS(&w) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&Y)); + CHECK_MPI_OK(mp_init(&R)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&x)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&u)); + CHECK_MPI_OK(mp_init(&w)); + SECITEM_TO_MPINT(*prime, &p); + SECITEM_TO_MPINT(*public1, &Y); + SECITEM_TO_MPINT(*public2, &R); + SECITEM_TO_MPINT(*private1, &r); + SECITEM_TO_MPINT(*private2, &x); + /* t = DH(Y, r, p) = Y ** r mod p */ + CHECK_MPI_OK(mp_exptmod(&Y, &r, &p, &t)); + /* u = DH(R, x, p) = R ** x mod p */ + CHECK_MPI_OK(mp_exptmod(&R, &x, &p, &u)); + /* w = (t + u) mod p */ + CHECK_MPI_OK(mp_addmod(&t, &u, &p, &w)); + /* allocate a buffer for the full derived secret */ + len = mp_unsigned_octet_size(&w); + secret = PORT_Alloc(len); + if (secret == NULL) { + err = MP_MEM; + goto cleanup; + } + /* grab the secret */ + err = mp_to_unsigned_octets(&w, secret, len); + if (err > 0) + err = MP_OKAY; + /* allocate output buffer */ + if (SECITEM_AllocItem(NULL, derivedSecret, KEA_DERIVED_SECRET_LEN) == NULL) { + err = MP_MEM; + goto cleanup; + } + memset(derivedSecret->data, 0, derivedSecret->len); + /* copy in the 128 lsb of the secret */ + if (len >= KEA_DERIVED_SECRET_LEN) { + memcpy(derivedSecret->data, secret + (len - KEA_DERIVED_SECRET_LEN), + KEA_DERIVED_SECRET_LEN); + } else { + offset = KEA_DERIVED_SECRET_LEN - len; + memcpy(derivedSecret->data + offset, secret, len); + } +cleanup: + mp_clear(&p); + mp_clear(&Y); + mp_clear(&R); + mp_clear(&r); + mp_clear(&x); + mp_clear(&t); + mp_clear(&u); + mp_clear(&w); + if (secret) + PORT_ZFree(secret, len); + if (err) { + MP_TO_SEC_ERROR(err); + if (derivedSecret->data) + PORT_ZFree(derivedSecret->data, derivedSecret->len); + return SECFailure; + } + return SECSuccess; +} + +/* Test counts based on the fact the prime and subprime + * were given to us */ +static int +dh_prime_testcount(int prime_length) +{ + if (prime_length < 1024) { + return 50; + } else if (prime_length < 2048) { + return 40; + } else if (prime_length < 3072) { + return 56; + } + return 64; +} + +PRBool +KEA_PrimeCheck(SECItem *prime) +{ + mp_int p; + mp_err err = 0; + MP_DIGITS(&p) = 0; + CHECK_MPI_OK(mp_init(&p)); + SECITEM_TO_MPINT(*prime, &p); + CHECK_MPI_OK(mpp_pprime_secure(&p, dh_prime_testcount(prime->len))); +cleanup: + mp_clear(&p); + return err ? PR_FALSE : PR_TRUE; +} + +PRBool +KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime) +{ + mp_int p, q, y, r; + mp_err err; + int cmp = 1; /* default is false */ + if (!Y || !prime || !subPrime) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&y) = 0; + MP_DIGITS(&r) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&y)); + CHECK_MPI_OK(mp_init(&r)); + SECITEM_TO_MPINT(*prime, &p); + SECITEM_TO_MPINT(*subPrime, &q); + SECITEM_TO_MPINT(*Y, &y); + /* compute r = y**q mod p */ + CHECK_MPI_OK(mp_exptmod(&y, &q, &p, &r)); + /* compare to 1 */ + cmp = mp_cmp_d(&r, 1); +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&y); + mp_clear(&r); + if (err) { + MP_TO_SEC_ERROR(err); + return PR_FALSE; + } + return (cmp == 0) ? PR_TRUE : PR_FALSE; +} diff --git a/security/nss/lib/freebl/drbg.c b/security/nss/lib/freebl/drbg.c new file mode 100644 index 0000000000..3ed1751c3e --- /dev/null +++ b/security/nss/lib/freebl/drbg.c @@ -0,0 +1,1024 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerror.h" +#include "secerr.h" + +#include "prtypes.h" +#include "prinit.h" +#include "blapi.h" +#include "blapii.h" +#include "nssilock.h" +#include "secitem.h" +#include "sha_fast.h" +#include "sha256.h" +#include "secrng.h" /* for RNG_SystemRNG() */ +#include "secmpi.h" + +/* PRNG_SEEDLEN defined in NIST SP 800-90 section 10.1 + * for SHA-1, SHA-224, and SHA-256 it's 440 bits. + * for SHA-384 and SHA-512 it's 888 bits */ +#define PRNG_SEEDLEN (440 / PR_BITS_PER_BYTE) +#define PRNG_MAX_ADDITIONAL_BYTES PR_INT64(0x100000000) +/* 2^35 bits or 2^32 bytes */ +#define PRNG_MAX_REQUEST_SIZE 0x10000 /* 2^19 bits or 2^16 bytes */ +#define PRNG_ADDITONAL_DATA_CACHE_SIZE (8 * 1024) /* must be less than \ + * PRNG_MAX_ADDITIONAL_BYTES \ + */ +#define PRNG_ENTROPY_BLOCK_SIZE SHA256_LENGTH + +/* RESEED_COUNT is how many calls to the prng before we need to reseed + * under normal NIST rules, you must return an error. In the NSS case, we + * self-reseed with RNG_SystemRNG(). Count can be a large number. For code + * simplicity, we specify count with 2 components: RESEED_BYTE (which is + * the same as LOG256(RESEED_COUNT)) and RESEED_VALUE (which is the same as + * RESEED_COUNT / (256 ^ RESEED_BYTE)). Another way to look at this is + * RESEED_COUNT = RESEED_VALUE * (256 ^ RESEED_BYTE). For Hash based DRBG + * we use the maximum count value, 2^48, or RESEED_BYTE=6 and RESEED_VALUE=1 + */ +#define RESEED_BYTE 6 +#define RESEED_VALUE 1 + +#define PRNG_RESET_RESEED_COUNT(rng) \ + PORT_Memset((rng)->reseed_counter, 0, sizeof(rng)->reseed_counter); \ + (rng)->reseed_counter[RESEED_BYTE] = 1; + +/* + * The actual values of this enum are specified in SP 800-90, 10.1.1.* + * The spec does not name the types, it only uses bare values + */ +typedef enum { + prngCGenerateType = 0, /* used when creating a new 'C' */ + prngReseedType = 1, /* used in reseeding */ + prngAdditionalDataType = 2, /* used in mixing additional data */ + prngGenerateByteType = 3 /* used when mixing internal state while + * generating bytes */ +} prngVTypes; + +/* + * Global RNG context + */ +struct RNGContextStr { + PZLock *lock; /* Lock to serialize access to global rng */ + /* + * NOTE, a number of steps in the drbg algorithm need to hash + * V_type || V. The code, therefore, depends on the V array following + * immediately after V_type to avoid extra copies. To accomplish this + * in a way that compiliers can't perturb, we declare V_type and V + * as a V_Data array and reference them by macros */ + PRUint8 V_Data[PRNG_SEEDLEN + 1]; /* internal state variables */ +#define V_type V_Data[0] +#define V(rng) (((rng)->V_Data) + 1) +#define VSize(rng) ((sizeof(rng)->V_Data) - 1) + PRUint8 C[PRNG_SEEDLEN]; /* internal state variables */ + /* If we get calls for the PRNG to return less than the length of our + * hash, we extend the request for a full hash (since we'll be doing + * the full hash anyway). Future requests for random numbers are fulfilled + * from the remainder of the bytes we generated. Requests for bytes longer + * than the hash size are fulfilled directly from the HashGen function + * of the random number generator. */ + PRUint8 reseed_counter[RESEED_BYTE + 1]; /* number of requests since the + * last reseed. Need only be + * big enough to hold the whole + * reseed count */ + PRUint8 data[SHA256_LENGTH]; /* when we request less than a block + * save the rest of the rng output for + * another partial block */ + PRUint8 dataAvail; /* # bytes of output available in our cache, + * [0...SHA256_LENGTH] */ + /* store additional data that has been shovelled off to us by + * RNG_RandomUpdate. */ + PRUint8 additionalDataCache[PRNG_ADDITONAL_DATA_CACHE_SIZE]; + PRUint32 additionalAvail; + PRBool isValid; /* false if RNG reaches an invalid state */ + PRBool isKatTest; /* true if running NIST PRNG KAT tests */ + /* for continuous entropy check */ + PRUint8 previousEntropyHash[SHA256_LENGTH]; +}; + +typedef struct RNGContextStr RNGContext; +static RNGContext *globalrng = NULL; +static RNGContext theGlobalRng; + +/* + * The next several functions are derived from the NIST SP 800-90 + * spec. In these functions, an attempt was made to use names consistent + * with the names in the spec, even if they differ from normal NSS usage. + */ + +/* + * Hash Derive function defined in NISP SP 800-90 Section 10.4.1. + * This function is used in the Instantiate and Reseed functions. + * + * NOTE: requested_bytes cannot overlap with input_string_1 or input_string_2. + * input_string_1 and input_string_2 are logically concatentated. + * input_string_1 must be supplied. + * if input_string_2 is not supplied, NULL should be passed for this parameter. + */ +static SECStatus +prng_Hash_df(PRUint8 *requested_bytes, unsigned int no_of_bytes_to_return, + const PRUint8 *input_string_1, unsigned int input_string_1_len, + const PRUint8 *input_string_2, unsigned int input_string_2_len) +{ + SHA256Context ctx; + PRUint32 tmp; + PRUint8 counter; + + tmp = SHA_HTONL(no_of_bytes_to_return * 8); + + for (counter = 1; no_of_bytes_to_return > 0; counter++) { + unsigned int hash_return_len; + SHA256_Begin(&ctx); + SHA256_Update(&ctx, &counter, 1); + SHA256_Update(&ctx, (unsigned char *)&tmp, sizeof tmp); + SHA256_Update(&ctx, input_string_1, input_string_1_len); + if (input_string_2) { + SHA256_Update(&ctx, input_string_2, input_string_2_len); + } + SHA256_End(&ctx, requested_bytes, &hash_return_len, + no_of_bytes_to_return); + requested_bytes += hash_return_len; + no_of_bytes_to_return -= hash_return_len; + } + SHA256_DestroyContext(&ctx, PR_FALSE); + return SECSuccess; +} + +/* + * Hash_DRBG Instantiate NIST SP 800-90 10.1.1.2 + * + * NOTE: bytes & len are entropy || nonce || personalization_string. In + * normal operation, NSS calculates them all together in a single call. + */ +static SECStatus +prng_instantiate(RNGContext *rng, const PRUint8 *bytes, unsigned int len) +{ + if (!rng->isKatTest && len < PRNG_SEEDLEN) { + /* If the seedlen is too small, it's probably because we failed to get + * enough random data. + * This is stricter than NIST SP800-90A requires. Don't enforce it for + * tests. */ + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return SECFailure; + } + prng_Hash_df(V(rng), VSize(rng), bytes, len, NULL, 0); + rng->V_type = prngCGenerateType; + prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0); + PRNG_RESET_RESEED_COUNT(rng) + return SECSuccess; +} + +static PRCallOnceType coRNGInitEntropy; + +static PRStatus +prng_initEntropy(void) +{ + size_t length; + PRUint8 block[PRNG_ENTROPY_BLOCK_SIZE]; + SHA256Context ctx; + + /* For FIPS 140-2 4.9.2 continuous random number generator test, + * fetch the initial entropy from the system RNG and keep it for + * later comparison. */ + length = RNG_SystemRNG(block, sizeof(block)); + if (length == 0) { + return PR_FAILURE; /* error is already set */ + } + PORT_Assert(length == sizeof(block)); + + /* Store the hash of the entropy block rather than the block + * itself for backward secrecy. */ + SHA256_Begin(&ctx); + SHA256_Update(&ctx, block, sizeof(block)); + SHA256_End(&ctx, globalrng->previousEntropyHash, NULL, + sizeof(globalrng->previousEntropyHash)); + PORT_Memset(block, 0, sizeof(block)); + SHA256_DestroyContext(&ctx, PR_FALSE); + return PR_SUCCESS; +} + +static SECStatus +prng_getEntropy(PRUint8 *buffer, size_t requestLength) +{ + size_t total = 0; + PRUint8 block[PRNG_ENTROPY_BLOCK_SIZE]; + PRUint8 hash[SHA256_LENGTH]; + SHA256Context ctx; + SECStatus rv = SECSuccess; + + if (PR_CallOnce(&coRNGInitEntropy, prng_initEntropy) != PR_SUCCESS) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + /* For FIPS 140-2 4.9.2 continuous random generator test, + * iteratively fetch fixed sized blocks from the system and + * compare consecutive blocks. */ + while (total < requestLength) { + size_t length = RNG_SystemRNG(block, sizeof(block)); + if (length == 0) { + rv = SECFailure; /* error is already set */ + goto out; + } + PORT_Assert(length == sizeof(block)); + + /* Store the hash of the entropy block rather than the block + * itself for backward secrecy. */ + SHA256_Begin(&ctx); + SHA256_Update(&ctx, block, sizeof(block)); + SHA256_End(&ctx, hash, NULL, sizeof(hash)); + + if (PORT_Memcmp(globalrng->previousEntropyHash, hash, sizeof(hash)) == 0) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + rv = SECFailure; + goto out; + } + PORT_Memcpy(globalrng->previousEntropyHash, hash, sizeof(hash)); + length = PR_MIN(requestLength - total, sizeof(block)); + PORT_Memcpy(buffer, block, length); + total += length; + buffer += length; + } + +out: + PORT_Memset(hash, 0, sizeof hash); + PORT_Memset(block, 0, sizeof block); + return rv; +} + +/* + * Update the global random number generator with more seeding + * material. Use the Hash_DRBG reseed algorithm from NIST SP-800-90 + * section 10.1.1.3 + * + * If entropy is NULL, it is fetched from the noise generator. + */ +static SECStatus +prng_reseed(RNGContext *rng, const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional_input, unsigned int additional_input_len) +{ + PRUint8 noiseData[(sizeof rng->V_Data) + PRNG_SEEDLEN]; + PRUint8 *noise = &noiseData[0]; + SECStatus rv; + + /* if entropy wasn't supplied, fetch it. (normal operation case) */ + if (entropy == NULL) { + entropy_len = PRNG_SEEDLEN; + rv = prng_getEntropy(&noiseData[sizeof rng->V_Data], entropy_len); + if (rv != SECSuccess) { + return SECFailure; /* error is already set */ + } + } else { + /* NOTE: this code is only available for testing, not to applications */ + /* if entropy was too big for the stack variable, get it from malloc */ + if (entropy_len > PRNG_SEEDLEN) { + noise = PORT_Alloc(entropy_len + (sizeof rng->V_Data)); + if (noise == NULL) { + return SECFailure; + } + } + PORT_Memcpy(&noise[sizeof rng->V_Data], entropy, entropy_len); + } + + if (entropy_len < 256 / PR_BITS_PER_BYTE) { + /* noise == &noiseData[0] at this point, so nothing to free */ + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return SECFailure; + } + + rng->V_type = prngReseedType; + PORT_Memcpy(noise, rng->V_Data, sizeof rng->V_Data); + prng_Hash_df(V(rng), VSize(rng), noise, (sizeof rng->V_Data) + entropy_len, + additional_input, additional_input_len); + /* clear potential CSP */ + PORT_Memset(noise, 0, (sizeof rng->V_Data) + entropy_len); + rng->V_type = prngCGenerateType; + prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0); + PRNG_RESET_RESEED_COUNT(rng) + + if (noise != &noiseData[0]) { + PORT_Free(noise); + } + return SECSuccess; +} + +/* + * SP 800-90 requires we rerun our health tests on reseed + */ +static SECStatus +prng_reseed_test(RNGContext *rng, const PRUint8 *entropy, + unsigned int entropy_len, const PRUint8 *additional_input, + unsigned int additional_input_len) +{ + SECStatus rv; + + /* do health checks in FIPS mode */ + rv = PRNGTEST_RunHealthTests(); + if (rv != SECSuccess) { + /* error set by PRNGTEST_RunHealTests() */ + rng->isValid = PR_FALSE; + return SECFailure; + } + return prng_reseed(rng, entropy, entropy_len, + additional_input, additional_input_len); +} + +/* + * build some fast inline functions for adding. + */ +#define PRNG_ADD_CARRY_ONLY(dest, start, carry) \ + { \ + int k1; \ + for (k1 = start; carry && k1 >= 0; k1--) { \ + carry = !(++dest[k1]); \ + } \ + } + +/* + * NOTE: dest must be an array for the following to work. + */ +#define PRNG_ADD_BITS(dest, dest_len, add, len, carry) \ + carry = 0; \ + PORT_Assert((dest_len) >= (len)); \ + { \ + int k1, k2; \ + for (k1 = dest_len - 1, k2 = len - 1; k2 >= 0; --k1, --k2) { \ + carry += dest[k1] + add[k2]; \ + dest[k1] = (PRUint8)carry; \ + carry >>= 8; \ + } \ + } + +#define PRNG_ADD_BITS_AND_CARRY(dest, dest_len, add, len, carry) \ + PRNG_ADD_BITS(dest, dest_len, add, len, carry) \ + PRNG_ADD_CARRY_ONLY(dest, dest_len - len - 1, carry) + +/* + * This function expands the internal state of the prng to fulfill any number + * of bytes we need for this request. We only use this call if we need more + * than can be supplied by a single call to SHA256_HashBuf. + * + * This function is specified in NIST SP 800-90 section 10.1.1.4, Hashgen + */ +static void +prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes, + unsigned int no_of_returned_bytes) +{ + PRUint8 data[VSize(rng)]; + PRUint8 thisHash[SHA256_LENGTH]; + + PORT_Memcpy(data, V(rng), VSize(rng)); + while (no_of_returned_bytes) { + SHA256Context ctx; + unsigned int len; + unsigned int carry; + + SHA256_Begin(&ctx); + SHA256_Update(&ctx, data, sizeof data); + SHA256_End(&ctx, thisHash, &len, SHA256_LENGTH); + if (no_of_returned_bytes < SHA256_LENGTH) { + len = no_of_returned_bytes; + } + PORT_Memcpy(returned_bytes, thisHash, len); + returned_bytes += len; + no_of_returned_bytes -= len; + /* The carry parameter is a bool (increment or not). + * This increments data if no_of_returned_bytes is not zero */ + carry = no_of_returned_bytes; + PRNG_ADD_CARRY_ONLY(data, (sizeof data) - 1, carry); + SHA256_DestroyContext(&ctx, PR_FALSE); + } + PORT_Memset(data, 0, sizeof data); + PORT_Memset(thisHash, 0, sizeof thisHash); +} + +/* + * Generates new random bytes and advances the internal prng state. + * additional bytes are only used in algorithm testing. + * + * This function is specified in NIST SP 800-90 section 10.1.1.4 + */ +static SECStatus +prng_generateNewBytes(RNGContext *rng, + PRUint8 *returned_bytes, unsigned int no_of_returned_bytes, + const PRUint8 *additional_input, + unsigned int additional_input_len) +{ + PRUint8 H[SHA256_LENGTH]; /* both H and w since they + * aren't used concurrently */ + unsigned int carry; + + if (!rng->isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* This code only triggers during tests, normal + * prng operation does not use additional_input */ + if (additional_input) { + SHA256Context ctx; +/* NIST SP 800-90 defines two temporaries in their calculations, + * w and H. These temporaries are the same lengths, and used + * at different times, so we use the following macro to collapse + * them to the same variable, but keeping their unique names for + * easy comparison to the spec */ +#define w H + rng->V_type = prngAdditionalDataType; + SHA256_Begin(&ctx); + SHA256_Update(&ctx, rng->V_Data, sizeof rng->V_Data); + SHA256_Update(&ctx, additional_input, additional_input_len); + SHA256_End(&ctx, w, NULL, sizeof w); + PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), w, sizeof w, carry) + PORT_Memset(w, 0, sizeof w); + SHA256_DestroyContext(&ctx, PR_FALSE); +#undef w + } + + if (no_of_returned_bytes == SHA256_LENGTH) { + /* short_cut to hashbuf and a couple of copies and clears */ + SHA256_HashBuf(returned_bytes, V(rng), VSize(rng)); + } else { + prng_Hashgen(rng, returned_bytes, no_of_returned_bytes); + } + /* advance our internal state... */ + rng->V_type = prngGenerateByteType; + SHA256_HashBuf(H, rng->V_Data, sizeof rng->V_Data); + PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), H, sizeof H, carry) + PRNG_ADD_BITS(V(rng), VSize(rng), rng->C, sizeof rng->C, carry); + PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), rng->reseed_counter, + sizeof rng->reseed_counter, carry) + carry = 1; + PRNG_ADD_CARRY_ONLY(rng->reseed_counter, (sizeof rng->reseed_counter) - 1, carry); + + /* if the prng failed, don't return any output, signal softoken */ + PORT_Memset(H, 0, sizeof H); + if (!rng->isValid) { + PORT_Memset(returned_bytes, 0, no_of_returned_bytes); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + return SECSuccess; +} + +/* Use NSPR to prevent RNG_RNGInit from being called from separate + * threads, creating a race condition. + */ +static const PRCallOnceType pristineCallOnce; +static PRCallOnceType coRNGInit; +static PRStatus +rng_init(void) +{ + PRUint8 bytes[PRNG_SEEDLEN * 2]; /* entropy + nonce */ + SECStatus rv = SECSuccess; + + if (globalrng == NULL) { + /* bytes needs to have enough space to hold + * a SHA256 hash value. Blow up at compile time if this isn't true */ + PR_STATIC_ASSERT(sizeof(bytes) >= SHA256_LENGTH); + /* create a new global RNG context */ + globalrng = &theGlobalRng; + PORT_Assert(NULL == globalrng->lock); + /* create a lock for it */ + globalrng->lock = PZ_NewLock(nssILockOther); + if (globalrng->lock == NULL) { + globalrng = NULL; + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return PR_FAILURE; + } + + /* Try to get some seed data for the RNG */ + rv = prng_getEntropy(bytes, sizeof bytes); + if (rv == SECSuccess) { + /* if this is our first call, instantiate, otherwise reseed + * prng_instantiate gets a new clean state, we want to mix + * any previous entropy we may have collected */ + if (V(globalrng)[0] == 0) { + rv = prng_instantiate(globalrng, bytes, sizeof bytes); + } else { + rv = prng_reseed_test(globalrng, bytes, sizeof bytes, NULL, 0); + } + memset(bytes, 0, sizeof bytes); + } else { + PZ_DestroyLock(globalrng->lock); + globalrng->lock = NULL; + globalrng = NULL; + return PR_FAILURE; + } + if (rv != SECSuccess) { + return PR_FAILURE; + } + + /* the RNG is in a valid state */ + globalrng->isValid = PR_TRUE; + globalrng->isKatTest = PR_FALSE; + + /* fetch one random value so that we can populate rng->oldV for our + * continous random number test. */ + prng_generateNewBytes(globalrng, bytes, SHA256_LENGTH, NULL, 0); + + /* Fetch more entropy into the PRNG */ + RNG_SystemInfoForRNG(); + } + return PR_SUCCESS; +} + +/* + * Clean up the global RNG context + */ +static void +prng_freeRNGContext(RNGContext *rng) +{ + PRUint8 inputhash[VSize(rng) + (sizeof rng->C)]; + + /* destroy context lock */ + SKIP_AFTER_FORK(PZ_DestroyLock(globalrng->lock)); + + /* zero global RNG context except for C & V to preserve entropy */ + prng_Hash_df(inputhash, sizeof rng->C, rng->C, sizeof rng->C, NULL, 0); + prng_Hash_df(&inputhash[sizeof rng->C], VSize(rng), V(rng), VSize(rng), + NULL, 0); + memset(rng, 0, sizeof *rng); + memcpy(rng->C, inputhash, sizeof rng->C); + memcpy(V(rng), &inputhash[sizeof rng->C], VSize(rng)); + + memset(inputhash, 0, sizeof inputhash); +} + +/* + * Public functions + */ + +/* + * Initialize the global RNG context and give it some seed input taken + * from the system. This function is thread-safe and will only allow + * the global context to be initialized once. The seed input is likely + * small, so it is imperative that RNG_RandomUpdate() be called with + * additional seed data before the generator is used. A good way to + * provide the generator with additional entropy is to call + * RNG_SystemInfoForRNG(). Note that C_Initialize() does exactly that. + */ +SECStatus +RNG_RNGInit(void) +{ + /* Allow only one call to initialize the context */ + PR_CallOnce(&coRNGInit, rng_init); + /* Make sure there is a context */ + return (globalrng != NULL) ? SECSuccess : SECFailure; +} + +/* +** Update the global random number generator with more seeding +** material. +*/ +SECStatus +RNG_RandomUpdate(const void *data, size_t bytes) +{ + SECStatus rv; + + /* Make sure our assumption that size_t is unsigned is true */ + PR_STATIC_ASSERT(((size_t)-1) > (size_t)1); + +#if defined(NS_PTR_GT_32) || (defined(NSS_USE_64) && !defined(NS_PTR_LE_32)) + /* + * NIST 800-90 requires us to verify our inputs. This value can + * come from the application, so we need to make sure it's within the + * spec. The spec says it must be less than 2^32 bytes (2^35 bits). + * This can only happen if size_t is greater than 32 bits (i.e. on + * most 64 bit platforms). The 90% case (perhaps 100% case), size_t + * is less than or equal to 32 bits if the platform is not 64 bits, and + * greater than 32 bits if it is a 64 bit platform. The corner + * cases are handled with explicit defines NS_PTR_GT_32 and NS_PTR_LE_32. + * + * In general, neither NS_PTR_GT_32 nor NS_PTR_LE_32 will need to be + * defined. If you trip over the next two size ASSERTS at compile time, + * you will need to define them for your platform. + * + * if 'sizeof(size_t) > 4' is triggered it means that we were expecting + * sizeof(size_t) to be greater than 4, but it wasn't. Setting + * NS_PTR_LE_32 will correct that mistake. + * + * if 'sizeof(size_t) <= 4' is triggered, it means that we were expecting + * sizeof(size_t) to be less than or equal to 4, but it wasn't. Setting + * NS_PTR_GT_32 will correct that mistake. + */ + + PR_STATIC_ASSERT(sizeof(size_t) > 4); + + if (bytes > (size_t)PRNG_MAX_ADDITIONAL_BYTES) { + bytes = PRNG_MAX_ADDITIONAL_BYTES; + } +#else + PR_STATIC_ASSERT(sizeof(size_t) <= 4); +#endif + + PZ_Lock(globalrng->lock); + /* if we're passed more than our additionalDataCache, simply + * call reseed with that data */ + if (bytes > sizeof(globalrng->additionalDataCache)) { + rv = prng_reseed_test(globalrng, NULL, 0, data, (unsigned int)bytes); + /* if we aren't going to fill or overflow the buffer, just cache it */ + } else if (bytes < ((sizeof globalrng->additionalDataCache) - globalrng->additionalAvail)) { + PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail, + data, bytes); + globalrng->additionalAvail += (PRUint32)bytes; + rv = SECSuccess; + } else { + /* we are going to fill or overflow the buffer. In this case we will + * fill the entropy buffer, reseed with it, start a new buffer with the + * remainder. We know the remainder will fit in the buffer because + * we already handled the case where bytes > the size of the buffer. + */ + size_t bufRemain = (sizeof globalrng->additionalDataCache) - globalrng->additionalAvail; + /* fill the rest of the buffer */ + if (bufRemain) { + PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail, + data, bufRemain); + data = ((unsigned char *)data) + bufRemain; + bytes -= bufRemain; + } + /* reseed from buffer */ + rv = prng_reseed_test(globalrng, NULL, 0, + globalrng->additionalDataCache, + sizeof globalrng->additionalDataCache); + + /* copy the rest into the cache */ + PORT_Memcpy(globalrng->additionalDataCache, data, bytes); + globalrng->additionalAvail = (PRUint32)bytes; + } + + PZ_Unlock(globalrng->lock); + return rv; +} + +/* +** Generate some random bytes, using the global random number generator +** object. +*/ +static SECStatus +prng_GenerateGlobalRandomBytes(RNGContext *rng, + void *dest, size_t len) +{ + SECStatus rv = SECSuccess; + PRUint8 *output = dest; + /* check for a valid global RNG context */ + PORT_Assert(rng != NULL); + if (rng == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* FIPS limits the amount of entropy available in a single request */ + if (len > PRNG_MAX_REQUEST_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* --- LOCKED --- */ + PZ_Lock(rng->lock); + /* Check the amount of seed data in the generator. If not enough, + * don't produce any data. + */ + if (rng->reseed_counter[0] >= RESEED_VALUE) { + rv = prng_reseed_test(rng, NULL, 0, NULL, 0); + PZ_Unlock(rng->lock); + if (rv != SECSuccess) { + return rv; + } + RNG_SystemInfoForRNG(); + PZ_Lock(rng->lock); + } + /* + * see if we have enough bytes to fulfill the request. + */ + if (len <= rng->dataAvail) { + memcpy(output, rng->data + ((sizeof rng->data) - rng->dataAvail), len); + memset(rng->data + ((sizeof rng->data) - rng->dataAvail), 0, len); + rng->dataAvail -= len; + rv = SECSuccess; + /* if we are asking for a small number of bytes, cache the rest of + * the bytes */ + } else if (len < sizeof rng->data) { + rv = prng_generateNewBytes(rng, rng->data, sizeof rng->data, + rng->additionalAvail ? rng->additionalDataCache : NULL, + rng->additionalAvail); + rng->additionalAvail = 0; + if (rv == SECSuccess) { + memcpy(output, rng->data, len); + memset(rng->data, 0, len); + rng->dataAvail = (sizeof rng->data) - len; + } + /* we are asking for lots of bytes, just ask the generator to pass them */ + } else { + rv = prng_generateNewBytes(rng, output, len, + rng->additionalAvail ? rng->additionalDataCache : NULL, + rng->additionalAvail); + rng->additionalAvail = 0; + } + PZ_Unlock(rng->lock); + /* --- UNLOCKED --- */ + return rv; +} + +/* +** Generate some random bytes, using the global random number generator +** object. +*/ +SECStatus +RNG_GenerateGlobalRandomBytes(void *dest, size_t len) +{ + return prng_GenerateGlobalRandomBytes(globalrng, dest, len); +} + +void +RNG_RNGShutdown(void) +{ + /* check for a valid global RNG context */ + PORT_Assert(globalrng != NULL); + if (globalrng == NULL) { + /* Should set a "not initialized" error code. */ + PORT_SetError(SEC_ERROR_NO_MEMORY); + return; + } + /* clear */ + prng_freeRNGContext(globalrng); + globalrng = NULL; + /* reset the callonce struct to allow a new call to RNG_RNGInit() */ + coRNGInit = pristineCallOnce; +} + +/* + * Test case interface. used by fips testing and power on self test + */ +/* make sure the test context is separate from the global context, This + * allows us to test the internal random number generator without losing + * entropy we may have previously collected. */ +RNGContext testContext; + +SECStatus +PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + testContext.isKatTest = PR_TRUE; + return PRNGTEST_Instantiate(entropy, entropy_len, + nonce, nonce_len, + personal_string, ps_len); +} + +/* + * Test vector API. Use NIST SP 800-90 general interface so one of the + * other NIST SP 800-90 algorithms may be used in the future. + */ +SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + int bytes_len = entropy_len + nonce_len + ps_len; + PRUint8 *bytes = NULL; + SECStatus rv; + + if (entropy_len < 256 / PR_BITS_PER_BYTE) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return SECFailure; + } + + bytes = PORT_Alloc(bytes_len); + if (bytes == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + /* concatenate the various inputs, internally NSS only instantiates with + * a single long string */ + PORT_Memcpy(bytes, entropy, entropy_len); + if (nonce) { + PORT_Memcpy(&bytes[entropy_len], nonce, nonce_len); + } else { + PORT_Assert(nonce_len == 0); + } + if (personal_string) { + PORT_Memcpy(&bytes[entropy_len + nonce_len], personal_string, ps_len); + } else { + PORT_Assert(ps_len == 0); + } + rv = prng_instantiate(&testContext, bytes, bytes_len); + PORT_ZFree(bytes, bytes_len); + if (rv == SECFailure) { + return SECFailure; + } + testContext.isValid = PR_TRUE; + return SECSuccess; +} + +SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len) +{ + if (!testContext.isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* This magic input tells us to set the reseed count to it's max count, + * so we can simulate PRNGTEST_Generate reaching max reseed count */ + if ((entropy == NULL) && (entropy_len == 0) && + (additional == NULL) && (additional_len == 0)) { + testContext.reseed_counter[0] = RESEED_VALUE; + return SECSuccess; + } + return prng_reseed(&testContext, entropy, entropy_len, additional, + additional_len); +} + +SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len) +{ + SECStatus rv; + if (!testContext.isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* replicate reseed test from prng_GenerateGlobalRandomBytes */ + if (testContext.reseed_counter[0] >= RESEED_VALUE) { + rv = prng_reseed(&testContext, NULL, 0, NULL, 0); + if (rv != SECSuccess) { + return rv; + } + } + return prng_generateNewBytes(&testContext, bytes, bytes_len, + additional, additional_len); +} + +SECStatus +PRNGTEST_Uninstantiate() +{ + if (!testContext.isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + PORT_Memset(&testContext, 0, sizeof testContext); + return SECSuccess; +} + +SECStatus +PRNGTEST_RunHealthTests() +{ + static const PRUint8 entropy[] = { + 0x8e, 0x9c, 0x0d, 0x25, 0x75, 0x22, 0x04, 0xf9, + 0xc5, 0x79, 0x10, 0x8b, 0x23, 0x79, 0x37, 0x14, + 0x9f, 0x2c, 0xc7, 0x0b, 0x39, 0xf8, 0xee, 0xef, + 0x95, 0x0c, 0x97, 0x59, 0xfc, 0x0a, 0x85, 0x41, + 0x76, 0x9d, 0x6d, 0x67, 0x00, 0x4e, 0x19, 0x12, + 0x02, 0x16, 0x53, 0xea, 0xf2, 0x73, 0xd7, 0xd6, + 0x7f, 0x7e, 0xc8, 0xae, 0x9c, 0x09, 0x99, 0x7d, + 0xbb, 0x9e, 0x48, 0x7f, 0xbb, 0x96, 0x46, 0xb3, + 0x03, 0x75, 0xf8, 0xc8, 0x69, 0x45, 0x3f, 0x97, + 0x5e, 0x2e, 0x48, 0xe1, 0x5d, 0x58, 0x97, 0x4c + }; + static const PRUint8 rng_known_result[] = { + 0x16, 0xe1, 0x8c, 0x57, 0x21, 0xd8, 0xf1, 0x7e, + 0x5a, 0xa0, 0x16, 0x0b, 0x7e, 0xa6, 0x25, 0xb4, + 0x24, 0x19, 0xdb, 0x54, 0xfa, 0x35, 0x13, 0x66, + 0xbb, 0xaa, 0x2a, 0x1b, 0x22, 0x33, 0x2e, 0x4a, + 0x14, 0x07, 0x9d, 0x52, 0xfc, 0x73, 0x61, 0x48, + 0xac, 0xc1, 0x22, 0xfc, 0xa4, 0xfc, 0xac, 0xa4, + 0xdb, 0xda, 0x5b, 0x27, 0x33, 0xc4, 0xb3 + }; + static const PRUint8 reseed_entropy[] = { + 0xc6, 0x0b, 0x0a, 0x30, 0x67, 0x07, 0xf4, 0xe2, + 0x24, 0xa7, 0x51, 0x6f, 0x5f, 0x85, 0x3e, 0x5d, + 0x67, 0x97, 0xb8, 0x3b, 0x30, 0x9c, 0x7a, 0xb1, + 0x52, 0xc6, 0x1b, 0xc9, 0x46, 0xa8, 0x62, 0x79 + }; + static const PRUint8 additional_input[] = { + 0x86, 0x82, 0x28, 0x98, 0xe7, 0xcb, 0x01, 0x14, + 0xae, 0x87, 0x4b, 0x1d, 0x99, 0x1b, 0xc7, 0x41, + 0x33, 0xff, 0x33, 0x66, 0x40, 0x95, 0x54, 0xc6, + 0x67, 0x4d, 0x40, 0x2a, 0x1f, 0xf9, 0xeb, 0x65 + }; + static const PRUint8 rng_reseed_result[] = { + 0x02, 0x0c, 0xc6, 0x17, 0x86, 0x49, 0xba, 0xc4, + 0x7b, 0x71, 0x35, 0x05, 0xf0, 0xdb, 0x4a, 0xc2, + 0x2c, 0x38, 0xc1, 0xa4, 0x42, 0xe5, 0x46, 0x4a, + 0x7d, 0xf0, 0xbe, 0x47, 0x88, 0xb8, 0x0e, 0xc6, + 0x25, 0x2b, 0x1d, 0x13, 0xef, 0xa6, 0x87, 0x96, + 0xa3, 0x7d, 0x5b, 0x80, 0xc2, 0x38, 0x76, 0x61, + 0xc7, 0x80, 0x5d, 0x0f, 0x05, 0x76, 0x85 + }; + static const PRUint8 rng_no_reseed_result[] = { + 0xc4, 0x40, 0x41, 0x8c, 0xbf, 0x2f, 0x70, 0x23, + 0x88, 0xf2, 0x7b, 0x30, 0xc3, 0xca, 0x1e, 0xf3, + 0xef, 0x53, 0x81, 0x5d, 0x30, 0xed, 0x4c, 0xf1, + 0xff, 0x89, 0xa5, 0xee, 0x92, 0xf8, 0xc0, 0x0f, + 0x88, 0x53, 0xdf, 0xb6, 0x76, 0xf0, 0xaa, 0xd3, + 0x2e, 0x1d, 0x64, 0x37, 0x3e, 0xe8, 0x4a, 0x02, + 0xff, 0x0a, 0x7f, 0xe5, 0xe9, 0x2b, 0x6d + }; + + SECStatus rng_status = SECSuccess; + PR_STATIC_ASSERT(sizeof(rng_known_result) >= sizeof(rng_reseed_result)); + PRUint8 result[sizeof(rng_known_result)]; + + /********************************************/ + /* First test instantiate error path. */ + /* In this case we supply enough entropy, */ + /* but not enough seed. This will trigger */ + /* the code that checks for a entropy */ + /* source failure. */ + /********************************************/ + rng_status = PRNGTEST_Instantiate(entropy, 256 / PR_BITS_PER_BYTE, + NULL, 0, NULL, 0); + if (rng_status == SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* we failed with the proper error code, we can continue */ + + /********************************************/ + /* Generate random bytes with a known seed. */ + /********************************************/ + rng_status = PRNGTEST_Instantiate(entropy, sizeof entropy, + NULL, 0, NULL, 0); + if (rng_status != SECSuccess) { + /* Error set by PRNGTEST_Instantiate */ + return SECFailure; + } + rng_status = PRNGTEST_Generate(result, sizeof rng_known_result, NULL, 0); + if ((rng_status != SECSuccess) || + (PORT_Memcmp(result, rng_known_result, + sizeof rng_known_result) != 0)) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + rng_status = PRNGTEST_Reseed(reseed_entropy, sizeof reseed_entropy, + additional_input, sizeof additional_input); + if (rng_status != SECSuccess) { + /* Error set by PRNG_Reseed */ + PRNGTEST_Uninstantiate(); + return SECFailure; + } + rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0); + if ((rng_status != SECSuccess) || + (PORT_Memcmp(result, rng_reseed_result, + sizeof rng_reseed_result) != 0)) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* This magic forces the reseed count to it's max count, so we can see if + * PRNGTEST_Generate will actually when it reaches it's count */ + rng_status = PRNGTEST_Reseed(NULL, 0, NULL, 0); + if (rng_status != SECSuccess) { + PRNGTEST_Uninstantiate(); + /* Error set by PRNG_Reseed */ + return SECFailure; + } + /* This generate should now reseed */ + rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0); + if ((rng_status != SECSuccess) || + /* NOTE we fail if the result is equal to the no_reseed_result. + * no_reseed_result is the value we would have gotten if we didn't + * do an automatic reseed in PRNGTEST_Generate */ + (PORT_Memcmp(result, rng_no_reseed_result, + sizeof rng_no_reseed_result) == 0)) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* make sure reseed fails when we don't supply enough entropy */ + rng_status = PRNGTEST_Reseed(reseed_entropy, 4, NULL, 0); + if (rng_status == SECSuccess) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + rng_status = PRNGTEST_Uninstantiate(); + if (rng_status != SECSuccess) { + /* Error set by PRNG_Uninstantiate */ + return rng_status; + } + /* make sure uninstantiate fails if the contest is not initiated (also tests + * if the context was cleared in the previous Uninstantiate) */ + rng_status = PRNGTEST_Uninstantiate(); + if (rng_status == SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + if (PORT_GetError() != SEC_ERROR_LIBRARY_FAILURE) { + return rng_status; + } + + return SECSuccess; +} diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c new file mode 100644 index 0000000000..b81d9a3700 --- /dev/null +++ b/security/nss/lib/freebl/dsa.c @@ -0,0 +1,691 @@ +/* + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerror.h" +#include "secerr.h" + +#include "prtypes.h" +#include "prinit.h" +#include "blapi.h" +#include "nssilock.h" +#include "secitem.h" +#include "blapit.h" +#include "mpi.h" +#include "secmpi.h" +#include "pqg.h" + +/* + * FIPS 186-2 requires result from random output to be reduced mod q when + * generating random numbers for DSA. + * + * Input: w, 2*qLen bytes + * q, qLen bytes + * Output: xj, qLen bytes + */ +static SECStatus +fips186Change_ReduceModQForDSA(const PRUint8 *w, const PRUint8 *q, + unsigned int qLen, PRUint8 *xj) +{ + mp_int W, Q, Xj; + mp_err err; + SECStatus rv = SECSuccess; + + /* Initialize MPI integers. */ + MP_DIGITS(&W) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&Xj) = 0; + CHECK_MPI_OK(mp_init(&W)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&Xj)); + /* + * Convert input arguments into MPI integers. + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&W, w, 2 * qLen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&Q, q, qLen)); + + /* + * Algorithm 1 of FIPS 186-2 Change Notice 1, Step 3.3 + * + * xj = (w0 || w1) mod q + */ + CHECK_MPI_OK(mp_mod(&W, &Q, &Xj)); + CHECK_MPI_OK(mp_to_fixlen_octets(&Xj, xj, qLen)); +cleanup: + mp_clear(&W); + mp_clear(&Q); + mp_clear(&Xj); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* + * FIPS 186-2 requires result from random output to be reduced mod q when + * generating random numbers for DSA. + */ +SECStatus +FIPS186Change_ReduceModQForDSA(const unsigned char *w, + const unsigned char *q, + unsigned char *xj) +{ + return fips186Change_ReduceModQForDSA(w, q, DSA1_SUBPRIME_LEN, xj); +} + +/* + * The core of Algorithm 1 of FIPS 186-2 Change Notice 1. + * + * We no longer support FIPS 186-2 RNG. This function was exported + * for power-up self tests and FIPS tests. Keep this stub, which fails, + * to prevent crashes, but also to signal to test code that FIPS 186-2 + * RNG is no longer supported. + */ +SECStatus +FIPS186Change_GenerateX(PRUint8 *XKEY, const PRUint8 *XSEEDj, + PRUint8 *x_j) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return SECFailure; +} + +/* + * Specialized RNG for DSA + * + * As per Algorithm 1 of FIPS 186-2 Change Notice 1, in step 3.3 the value + * Xj should be reduced mod q, a 160-bit prime number. Since this parameter + * is only meaningful in the context of DSA, the above RNG functions + * were implemented without it. They are re-implemented below for use + * with DSA. + */ + +/* +** Generate some random bytes, using the global random number generator +** object. In DSA mode, so there is a q. +*/ +static SECStatus +dsa_GenerateGlobalRandomBytes(const SECItem *qItem, PRUint8 *dest, + unsigned int *destLen, unsigned int maxDestLen) +{ + SECStatus rv; + SECItem w; + const PRUint8 *q = qItem->data; + unsigned int qLen = qItem->len; + + if (*q == 0) { + ++q; + --qLen; + } + if (maxDestLen < qLen) { + /* This condition can occur when DSA_SignDigest is passed a group + with a subprime that is larger than DSA_MAX_SUBPRIME_LEN. */ + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + w.data = NULL; /* otherwise SECITEM_AllocItem asserts */ + if (!SECITEM_AllocItem(NULL, &w, 2 * qLen)) { + return SECFailure; + } + *destLen = qLen; + + rv = RNG_GenerateGlobalRandomBytes(w.data, w.len); + if (rv == SECSuccess) { + rv = fips186Change_ReduceModQForDSA(w.data, q, qLen, dest); + } + + SECITEM_FreeItem(&w, PR_FALSE); + return rv; +} + +static void +translate_mpi_error(mp_err err) +{ + MP_TO_SEC_ERROR(err); +} + +static SECStatus +dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed, + DSAPrivateKey **privKey) +{ + mp_int p, g; + mp_int x, y; + mp_err err; + PLArenaPool *arena; + DSAPrivateKey *key; + /* Check args. */ + if (!params || !privKey || !seed || !seed->data) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* Initialize an arena for the DSA key. */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + key = (DSAPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DSAPrivateKey)); + if (!key) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + key->params.arena = arena; + /* Initialize MPI integers. */ + MP_DIGITS(&p) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_DIGITS(&y) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&x)); + CHECK_MPI_OK(mp_init(&y)); + /* Copy over the PQG params */ + CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.prime, + ¶ms->prime)); + CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.subPrime, + ¶ms->subPrime)); + CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.base, ¶ms->base)); + /* Convert stored p, g, and received x into MPI integers. */ + SECITEM_TO_MPINT(params->prime, &p); + SECITEM_TO_MPINT(params->base, &g); + OCTETS_TO_MPINT(seed->data, &x, seed->len); + /* Store x in private key */ + SECITEM_AllocItem(arena, &key->privateValue, seed->len); + PORT_Memcpy(key->privateValue.data, seed->data, seed->len); + /* Compute public key y = g**x mod p */ + CHECK_MPI_OK(mp_exptmod(&g, &x, &p, &y)); + /* Store y in public key */ + MPINT_TO_SECITEM(&y, &key->publicValue, arena); + *privKey = key; + key = NULL; +cleanup: + mp_clear(&p); + mp_clear(&g); + mp_clear(&x); + mp_clear(&y); + if (key) { + PORT_FreeArena(key->params.arena, PR_TRUE); + } + if (err) { + translate_mpi_error(err); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed) +{ + int retries = 10; + unsigned int i; + PRBool good; + + if (q == NULL || q->data == NULL || q->len == 0 || + (q->data[0] == 0 && q->len == 1)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (!SECITEM_AllocItem(arena, seed, q->len)) { + return SECFailure; + } + + do { + /* Generate seed bytes for x according to FIPS 186-1 appendix 3 */ + if (dsa_GenerateGlobalRandomBytes(q, seed->data, &seed->len, + seed->len)) { + goto loser; + } + /* Disallow values of 0 and 1 for x. */ + good = PR_FALSE; + for (i = 0; i < seed->len - 1; i++) { + if (seed->data[i] != 0) { + good = PR_TRUE; + break; + } + } + if (!good && seed->data[i] > 1) { + good = PR_TRUE; + } + } while (!good && --retries > 0); + + if (!good) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + loser: + if (arena != NULL) { + SECITEM_ZfreeItem(seed, PR_FALSE); + } + return SECFailure; + } + + return SECSuccess; +} + +/* +** Generate and return a new DSA public and private key pair, +** both of which are encoded into a single DSAPrivateKey struct. +** "params" is a pointer to the PQG parameters for the domain +** Uses a random seed. +*/ +SECStatus +DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey) +{ + SECItem seed; + SECStatus rv; + + rv = PQG_Check(params); + if (rv != SECSuccess) { + return rv; + } + seed.data = NULL; + + rv = DSA_NewRandom(NULL, ¶ms->subPrime, &seed); + if (rv == SECSuccess) { + if (seed.len != PQG_GetLength(¶ms->subPrime)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + } else { + rv = dsa_NewKeyExtended(params, &seed, privKey); + } + } + SECITEM_ZfreeItem(&seed, PR_FALSE); + return rv; +} + +/* For FIPS compliance testing. Seed must be exactly the size of subPrime */ +SECStatus +DSA_NewKeyFromSeed(const PQGParams *params, + const unsigned char *seed, + DSAPrivateKey **privKey) +{ + SECItem seedItem; + seedItem.data = (unsigned char *)seed; + seedItem.len = PQG_GetLength(¶ms->subPrime); + return dsa_NewKeyExtended(params, &seedItem, privKey); +} + +static SECStatus +dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest, + const unsigned char *kbytes) +{ + mp_int p, q, g; /* PQG parameters */ + mp_int x, k; /* private key & pseudo-random integer */ + mp_int r, s; /* tuple (r, s) is signature) */ + mp_int t; /* holding tmp values */ + mp_int ar; /* holding blinding values */ + mp_digit fuzz; /* blinding multiplier for q */ + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + unsigned int dsa_subprime_len, dsa_signature_len, offset; + SECItem localDigest; + unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN]; + SECItem t2 = { siBuffer, NULL, 0 }; + + /* FIPS-compliance dictates that digest is a SHA hash. */ + /* Check args. */ + if (!key || !signature || !digest) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + dsa_subprime_len = PQG_GetLength(&key->params.subPrime); + dsa_signature_len = dsa_subprime_len * 2; + if ((signature->len < dsa_signature_len) || + (digest->len > HASH_LENGTH_MAX) || + (digest->len < SHA1_LENGTH)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* DSA accepts digests not equal to dsa_subprime_len, if the + * digests are greater, then they are truncated to the size of + * dsa_subprime_len, using the left most bits. If they are less + * then they are padded on the left.*/ + PORT_Memset(localDigestData, 0, dsa_subprime_len); + offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0; + PORT_Memcpy(localDigestData + offset, digest->data, + dsa_subprime_len - offset); + localDigest.data = localDigestData; + localDigest.len = dsa_subprime_len; + + /* Initialize MPI integers. */ + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_DIGITS(&k) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&s) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&ar) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&x)); + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&s)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&ar)); + + /* + ** Convert stored PQG and private key into MPI integers. + */ + SECITEM_TO_MPINT(key->params.prime, &p); + SECITEM_TO_MPINT(key->params.subPrime, &q); + SECITEM_TO_MPINT(key->params.base, &g); + SECITEM_TO_MPINT(key->privateValue, &x); + OCTETS_TO_MPINT(kbytes, &k, dsa_subprime_len); + + /* k blinding create a single value that has the high bit set in + * the mp_digit*/ + if (RNG_GenerateGlobalRandomBytes(&fuzz, sizeof(mp_digit)) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + fuzz |= 1ULL << ((sizeof(mp_digit) * PR_BITS_PER_BYTE - 1)); + /* + ** FIPS 186-1, Section 5, Step 1 + ** + ** r = (g**k mod p) mod q + */ + CHECK_MPI_OK(mp_mul_d(&q, fuzz, &t)); /* t = q*fuzz */ + CHECK_MPI_OK(mp_add(&k, &t, &t)); /* t = k+q*fuzz */ + /* length of t is now fixed, bits in k have been blinded */ + CHECK_MPI_OK(mp_exptmod(&g, &t, &p, &r)); /* r = g**t mod p */ + /* r is now g**(k+q*fuzz) == g**k mod p */ + CHECK_MPI_OK(mp_mod(&r, &q, &r)); /* r = r mod q */ + /* make sure fuzz is cleared off the stack and not optimized away */ + *(volatile mp_digit *)&fuzz = 0; + + /* + ** FIPS 186-1, Section 5, Step 2 + ** + ** s = (k**-1 * (HASH(M) + x*r)) mod q + */ + if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */ + SECITEM_ZfreeItem(&t2, PR_FALSE); + if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + SECITEM_TO_MPINT(t2, &ar); /* ar <-$ Zq */ + SECITEM_ZfreeItem(&t2, PR_FALSE); + + /* Using mp_invmod on k directly would leak bits from k. */ + CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */ + /* k is now k*t*ar */ + CHECK_MPI_OK(mp_invmod(&k, &q, &k)); /* k = k**-1 mod q */ + /* k is now (k*t*ar)**-1 */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */ + /* k is now (k*ar)**-1 */ + SECITEM_TO_MPINT(localDigest, &s); /* s = HASH(M) */ + /* To avoid leaking secret bits here the addition is blinded. */ + CHECK_MPI_OK(mp_mul(&x, &ar, &x)); /* x = x * ar */ + /* x is now x*ar */ + CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */ + /* x is now x*r*ar */ + CHECK_MPI_OK(mp_mulmod(&s, &ar, &q, &t)); /* t = s * ar mod q */ + /* t is now hash(M)*ar */ + CHECK_MPI_OK(mp_add(&t, &x, &s)); /* s = t + x */ + /* s is now (HASH(M)+x*r)*ar */ + CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */ + /* s is now (HASH(M)+x*r)*ar*(k*ar)**-1 = (k**-1)*(HASH(M)+x*r) */ + + /* + ** verify r != 0 and s != 0 + ** mentioned as optional in FIPS 186-1. + */ + if (mp_cmp_z(&r) == 0 || mp_cmp_z(&s) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + /* + ** Step 4 + ** + ** Signature is tuple (r, s) + */ + err = mp_to_fixlen_octets(&r, signature->data, dsa_subprime_len); + if (err < 0) + goto cleanup; + err = mp_to_fixlen_octets(&s, signature->data + dsa_subprime_len, + dsa_subprime_len); + if (err < 0) + goto cleanup; + err = MP_OKAY; + signature->len = dsa_signature_len; +cleanup: + PORT_Memset(localDigestData, 0, DSA_MAX_SUBPRIME_LEN); + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&x); + mp_clear(&k); + mp_clear(&r); + mp_clear(&s); + mp_clear(&t); + mp_clear(&ar); + if (err) { + translate_mpi_error(err); + rv = SECFailure; + } + return rv; +} + +/* signature is caller-supplied buffer of at least 40 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +** On output, signature->len == size of signature in buffer. +** Uses a random seed. +*/ +SECStatus +DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest) +{ + SECStatus rv; + int retries = 10; + unsigned char kSeed[DSA_MAX_SUBPRIME_LEN]; + unsigned int kSeedLen = 0; + unsigned int i; + unsigned int dsa_subprime_len = PQG_GetLength(&key->params.subPrime); + PRBool good; + + PORT_SetError(0); + do { + rv = dsa_GenerateGlobalRandomBytes(&key->params.subPrime, + kSeed, &kSeedLen, sizeof kSeed); + if (rv != SECSuccess) + break; + if (kSeedLen != dsa_subprime_len) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + break; + } + /* Disallow a value of 0 for k. */ + good = PR_FALSE; + for (i = 0; i < kSeedLen; i++) { + if (kSeed[i] != 0) { + good = PR_TRUE; + break; + } + } + if (!good) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + continue; + } + rv = dsa_SignDigest(key, signature, digest, kSeed); + } while (rv != SECSuccess && PORT_GetError() == SEC_ERROR_NEED_RANDOM && + --retries > 0); + PORT_Memset(kSeed, 0, sizeof kSeed); + return rv; +} + +/* For FIPS compliance testing. Seed must be exactly 20 bytes. */ +SECStatus +DSA_SignDigestWithSeed(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed) +{ + SECStatus rv; + rv = dsa_SignDigest(key, signature, digest, seed); + return rv; +} + +/* signature is caller-supplied buffer of at least 20 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +*/ +SECStatus +DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + /* FIPS-compliance dictates that digest is a SHA hash. */ + mp_int p, q, g; /* PQG parameters */ + mp_int r_, s_; /* tuple (r', s') is received signature) */ + mp_int u1, u2, v, w; /* intermediate values used in verification */ + mp_int y; /* public key */ + mp_err err; + unsigned int dsa_subprime_len, dsa_signature_len, offset; + SECItem localDigest; + unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN]; + SECStatus verified = SECFailure; + + /* Check args. */ + if (!key || !signature || !digest) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + dsa_subprime_len = PQG_GetLength(&key->params.subPrime); + dsa_signature_len = dsa_subprime_len * 2; + if ((signature->len != dsa_signature_len) || + (digest->len > HASH_LENGTH_MAX) || + (digest->len < SHA1_LENGTH)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* DSA accepts digests not equal to dsa_subprime_len, if the + * digests are greater, than they are truncated to the size of + * dsa_subprime_len, using the left most bits. If they are less + * then they are padded on the left.*/ + PORT_Memset(localDigestData, 0, dsa_subprime_len); + offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0; + PORT_Memcpy(localDigestData + offset, digest->data, + dsa_subprime_len - offset); + localDigest.data = localDigestData; + localDigest.len = dsa_subprime_len; + + /* Initialize MPI integers. */ + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&y) = 0; + MP_DIGITS(&r_) = 0; + MP_DIGITS(&s_) = 0; + MP_DIGITS(&u1) = 0; + MP_DIGITS(&u2) = 0; + MP_DIGITS(&v) = 0; + MP_DIGITS(&w) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&y)); + CHECK_MPI_OK(mp_init(&r_)); + CHECK_MPI_OK(mp_init(&s_)); + CHECK_MPI_OK(mp_init(&u1)); + CHECK_MPI_OK(mp_init(&u2)); + CHECK_MPI_OK(mp_init(&v)); + CHECK_MPI_OK(mp_init(&w)); + /* + ** Convert stored PQG and public key into MPI integers. + */ + SECITEM_TO_MPINT(key->params.prime, &p); + SECITEM_TO_MPINT(key->params.subPrime, &q); + SECITEM_TO_MPINT(key->params.base, &g); + SECITEM_TO_MPINT(key->publicValue, &y); + /* + ** Convert received signature (r', s') into MPI integers. + */ + OCTETS_TO_MPINT(signature->data, &r_, dsa_subprime_len); + OCTETS_TO_MPINT(signature->data + dsa_subprime_len, &s_, dsa_subprime_len); + /* + ** Verify that 0 < r' < q and 0 < s' < q + */ + if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 || + mp_cmp(&r_, &q) >= 0 || mp_cmp(&s_, &q) >= 0) { + /* err is zero here. */ + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto cleanup; /* will return verified == SECFailure */ + } + /* + ** FIPS 186-1, Section 6, Step 1 + ** + ** w = (s')**-1 mod q + */ + CHECK_MPI_OK(mp_invmod(&s_, &q, &w)); /* w = (s')**-1 mod q */ + /* + ** FIPS 186-1, Section 6, Step 2 + ** + ** u1 = ((Hash(M')) * w) mod q + */ + SECITEM_TO_MPINT(localDigest, &u1); /* u1 = HASH(M') */ + CHECK_MPI_OK(mp_mulmod(&u1, &w, &q, &u1)); /* u1 = u1 * w mod q */ + /* + ** FIPS 186-1, Section 6, Step 3 + ** + ** u2 = ((r') * w) mod q + */ + CHECK_MPI_OK(mp_mulmod(&r_, &w, &q, &u2)); + /* + ** FIPS 186-1, Section 6, Step 4 + ** + ** v = ((g**u1 * y**u2) mod p) mod q + */ + CHECK_MPI_OK(mp_exptmod(&g, &u1, &p, &g)); /* g = g**u1 mod p */ + CHECK_MPI_OK(mp_exptmod(&y, &u2, &p, &y)); /* y = y**u2 mod p */ + CHECK_MPI_OK(mp_mulmod(&g, &y, &p, &v)); /* v = g * y mod p */ + CHECK_MPI_OK(mp_mod(&v, &q, &v)); /* v = v mod q */ + /* + ** Verification: v == r' + */ + if (mp_cmp(&v, &r_)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + verified = SECFailure; /* Signature failed to verify. */ + } else { + verified = SECSuccess; /* Signature verified. */ + } +cleanup: + PORT_Memset(localDigestData, 0, sizeof localDigestData); + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&y); + mp_clear(&r_); + mp_clear(&s_); + mp_clear(&u1); + mp_clear(&u2); + mp_clear(&v); + mp_clear(&w); + if (err) { + translate_mpi_error(err); + } + return verified; +} diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c new file mode 100644 index 0000000000..35ccbf2ab2 --- /dev/null +++ b/security/nss/lib/freebl/ec.c @@ -0,0 +1,1170 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "blapii.h" +#include "prerr.h" +#include "secerr.h" +#include "secmpi.h" +#include "secitem.h" +#include "mplogic.h" +#include "ec.h" +#include "ecl.h" + +#define EC_DOUBLECHECK PR_FALSE + +static const ECMethod kMethods[] = { + { ECCurve25519, + ec_Curve25519_pt_mul, + ec_Curve25519_pt_validate } +}; + +static const ECMethod * +ec_get_method_from_name(ECCurveName name) +{ + unsigned long i; + for (i = 0; i < sizeof(kMethods) / sizeof(kMethods[0]); ++i) { + if (kMethods[i].name == name) { + return &kMethods[i]; + } + } + return NULL; +} + +/* + * Returns true if pointP is the point at infinity, false otherwise + */ +PRBool +ec_point_at_infinity(SECItem *pointP) +{ + unsigned int i; + + for (i = 1; i < pointP->len; i++) { + if (pointP->data[i] != 0x00) + return PR_FALSE; + } + + return PR_TRUE; +} + +/* + * Computes scalar point multiplication pointQ = k1 * G + k2 * pointP for + * the curve whose parameters are encoded in params with base point G. + */ +SECStatus +ec_points_mul(const ECParams *params, const mp_int *k1, const mp_int *k2, + const SECItem *pointP, SECItem *pointQ) +{ + mp_int Px, Py, Qx, Qy; + mp_int Gx, Gy, order, irreducible, a, b; + ECGroup *group = NULL; + SECStatus rv = SECFailure; + mp_err err = MP_OKAY; + unsigned int len; + +#if EC_DEBUG + int i; + char mpstr[256]; + + printf("ec_points_mul: params [len=%d]:", params->DEREncoding.len); + for (i = 0; i < params->DEREncoding.len; i++) + printf("%02x:", params->DEREncoding.data[i]); + printf("\n"); + + if (k1 != NULL) { + mp_tohex((mp_int *)k1, mpstr); + printf("ec_points_mul: scalar k1: %s\n", mpstr); + mp_todecimal((mp_int *)k1, mpstr); + printf("ec_points_mul: scalar k1: %s (dec)\n", mpstr); + } + + if (k2 != NULL) { + mp_tohex((mp_int *)k2, mpstr); + printf("ec_points_mul: scalar k2: %s\n", mpstr); + mp_todecimal((mp_int *)k2, mpstr); + printf("ec_points_mul: scalar k2: %s (dec)\n", mpstr); + } + + if (pointP != NULL) { + printf("ec_points_mul: pointP [len=%d]:", pointP->len); + for (i = 0; i < pointP->len; i++) + printf("%02x:", pointP->data[i]); + printf("\n"); + } +#endif + + /* NOTE: We only support uncompressed points for now */ + len = (((unsigned int)params->fieldID.size) + 7) >> 3; + if (pointP != NULL) { + if ((pointP->data[0] != EC_POINT_FORM_UNCOMPRESSED) || + (pointP->len != (2 * len + 1))) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM); + return SECFailure; + }; + } + + MP_DIGITS(&Px) = 0; + MP_DIGITS(&Py) = 0; + MP_DIGITS(&Qx) = 0; + MP_DIGITS(&Qy) = 0; + MP_DIGITS(&Gx) = 0; + MP_DIGITS(&Gy) = 0; + MP_DIGITS(&order) = 0; + MP_DIGITS(&irreducible) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&b) = 0; + CHECK_MPI_OK(mp_init(&Px)); + CHECK_MPI_OK(mp_init(&Py)); + CHECK_MPI_OK(mp_init(&Qx)); + CHECK_MPI_OK(mp_init(&Qy)); + CHECK_MPI_OK(mp_init(&Gx)); + CHECK_MPI_OK(mp_init(&Gy)); + CHECK_MPI_OK(mp_init(&order)); + CHECK_MPI_OK(mp_init(&irreducible)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&b)); + + if ((k2 != NULL) && (pointP != NULL)) { + /* Initialize Px and Py */ + CHECK_MPI_OK(mp_read_unsigned_octets(&Px, pointP->data + 1, (mp_size)len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&Py, pointP->data + 1 + len, (mp_size)len)); + } + + /* construct from named params, if possible */ + if (params->name != ECCurve_noName) { + group = ECGroup_fromName(params->name); + } + + if (group == NULL) + goto cleanup; + + if ((k2 != NULL) && (pointP != NULL)) { + CHECK_MPI_OK(ECPoints_mul(group, k1, k2, &Px, &Py, &Qx, &Qy)); + } else { + CHECK_MPI_OK(ECPoints_mul(group, k1, NULL, NULL, NULL, &Qx, &Qy)); + } + + /* our ECC codes uses large stack variables to store intermediate results, + * clear our stack before returning to prevent CSP leakage */ + BLAPI_CLEAR_STACK(2048) + + /* Construct the SECItem representation of point Q */ + pointQ->data[0] = EC_POINT_FORM_UNCOMPRESSED; + CHECK_MPI_OK(mp_to_fixlen_octets(&Qx, pointQ->data + 1, + (mp_size)len)); + CHECK_MPI_OK(mp_to_fixlen_octets(&Qy, pointQ->data + 1 + len, + (mp_size)len)); + + rv = SECSuccess; + +#if EC_DEBUG + printf("ec_points_mul: pointQ [len=%d]:", pointQ->len); + for (i = 0; i < pointQ->len; i++) + printf("%02x:", pointQ->data[i]); + printf("\n"); +#endif + +cleanup: + ECGroup_free(group); + mp_clear(&Px); + mp_clear(&Py); + mp_clear(&Qx); + mp_clear(&Qy); + mp_clear(&Gx); + mp_clear(&Gy); + mp_clear(&order); + mp_clear(&irreducible); + mp_clear(&a); + mp_clear(&b); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + + return rv; +} + +/* Generates a new EC key pair. The private key is a supplied + * value and the public key is the result of performing a scalar + * point multiplication of that value with the curve's base point. + */ +SECStatus +ec_NewKey(ECParams *ecParams, ECPrivateKey **privKey, + const unsigned char *privKeyBytes, int privKeyLen) +{ + SECStatus rv = SECFailure; + PLArenaPool *arena; + ECPrivateKey *key; + mp_int k; + mp_err err = MP_OKAY; + int len; + +#if EC_DEBUG + printf("ec_NewKey called\n"); +#endif + MP_DIGITS(&k) = 0; + + if (!ecParams || ecParams->name == ECCurve_noName || + !privKey || !privKeyBytes || privKeyLen <= 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Initialize an arena for the EC key. */ + if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE))) + return SECFailure; + + key = (ECPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(ECPrivateKey)); + if (!key) { + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + + /* Set the version number (SEC 1 section C.4 says it should be 1) */ + SECITEM_AllocItem(arena, &key->version, 1); + key->version.data[0] = 1; + + /* Copy all of the fields from the ECParams argument to the + * ECParams structure within the private key. + */ + key->ecParams.arena = arena; + key->ecParams.type = ecParams->type; + key->ecParams.fieldID.size = ecParams->fieldID.size; + key->ecParams.fieldID.type = ecParams->fieldID.type; + if (ecParams->fieldID.type == ec_field_GFp || + ecParams->fieldID.type == ec_field_plain) { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.prime, + &ecParams->fieldID.u.prime)); + } else { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.poly, + &ecParams->fieldID.u.poly)); + } + key->ecParams.fieldID.k1 = ecParams->fieldID.k1; + key->ecParams.fieldID.k2 = ecParams->fieldID.k2; + key->ecParams.fieldID.k3 = ecParams->fieldID.k3; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.a, + &ecParams->curve.a)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.b, + &ecParams->curve.b)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.seed, + &ecParams->curve.seed)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.base, + &ecParams->base)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.order, + &ecParams->order)); + key->ecParams.cofactor = ecParams->cofactor; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.DEREncoding, + &ecParams->DEREncoding)); + key->ecParams.name = ecParams->name; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curveOID, + &ecParams->curveOID)); + + SECITEM_AllocItem(arena, &key->publicValue, EC_GetPointSize(ecParams)); + len = ecParams->order.len; + SECITEM_AllocItem(arena, &key->privateValue, len); + + /* Copy private key */ + if (privKeyLen >= len) { + memcpy(key->privateValue.data, privKeyBytes, len); + } else { + memset(key->privateValue.data, 0, (len - privKeyLen)); + memcpy(key->privateValue.data + (len - privKeyLen), privKeyBytes, privKeyLen); + } + + /* Compute corresponding public key */ + + /* Use curve specific code for point multiplication */ + if (ecParams->fieldID.type == ec_field_plain) { + const ECMethod *method = ec_get_method_from_name(ecParams->name); + if (method == NULL || method->mul == NULL) { + /* unknown curve */ + rv = SECFailure; + goto cleanup; + } + rv = method->mul(&key->publicValue, &key->privateValue, NULL); + goto done; + } + + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_read_unsigned_octets(&k, key->privateValue.data, + (mp_size)len)); + + rv = ec_points_mul(ecParams, &k, NULL, NULL, &(key->publicValue)); + if (rv != SECSuccess) { + goto cleanup; + } + +done: + *privKey = key; + +cleanup: + mp_clear(&k); + if (rv) { + PORT_FreeArena(arena, PR_TRUE); + } + +#if EC_DEBUG + printf("ec_NewKey returning %s\n", + (rv == SECSuccess) ? "success" : "failure"); +#endif + + return rv; +} + +/* Generates a new EC key pair. The private key is a supplied + * random value (in seed) and the public key is the result of + * performing a scalar point multiplication of that value with + * the curve's base point. + */ +SECStatus +EC_NewKeyFromSeed(ECParams *ecParams, ECPrivateKey **privKey, + const unsigned char *seed, int seedlen) +{ + SECStatus rv = SECFailure; + rv = ec_NewKey(ecParams, privKey, seed, seedlen); + return rv; +} + +/* Generate a random private key using the algorithm A.4.1 of ANSI X9.62, + * modified a la FIPS 186-2 Change Notice 1 to eliminate the bias in the + * random number generator. + * + * Parameters + * - order: a buffer that holds the curve's group order + * - len: the length in octets of the order buffer + * + * Return Value + * Returns a buffer of len octets that holds the private key. The caller + * is responsible for freeing the buffer with PORT_ZFree. + */ +static unsigned char * +ec_GenerateRandomPrivateKey(const unsigned char *order, int len) +{ + SECStatus rv = SECSuccess; + mp_err err; + unsigned char *privKeyBytes = NULL; + mp_int privKeyVal, order_1, one; + + MP_DIGITS(&privKeyVal) = 0; + MP_DIGITS(&order_1) = 0; + MP_DIGITS(&one) = 0; + CHECK_MPI_OK(mp_init(&privKeyVal)); + CHECK_MPI_OK(mp_init(&order_1)); + CHECK_MPI_OK(mp_init(&one)); + + /* Generates 2*len random bytes using the global random bit generator + * (which implements Algorithm 1 of FIPS 186-2 Change Notice 1) then + * reduces modulo the group order. + */ + if ((privKeyBytes = PORT_Alloc(2 * len)) == NULL) + goto cleanup; + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(privKeyBytes, 2 * len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&privKeyVal, privKeyBytes, 2 * len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&order_1, order, len)); + CHECK_MPI_OK(mp_set_int(&one, 1)); + CHECK_MPI_OK(mp_sub(&order_1, &one, &order_1)); + CHECK_MPI_OK(mp_mod(&privKeyVal, &order_1, &privKeyVal)); + CHECK_MPI_OK(mp_add(&privKeyVal, &one, &privKeyVal)); + CHECK_MPI_OK(mp_to_fixlen_octets(&privKeyVal, privKeyBytes, len)); + memset(privKeyBytes + len, 0, len); +cleanup: + mp_clear(&privKeyVal); + mp_clear(&order_1); + mp_clear(&one); + if (err < MP_OKAY) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv != SECSuccess && privKeyBytes) { + PORT_ZFree(privKeyBytes, 2 * len); + privKeyBytes = NULL; + } + return privKeyBytes; +} + +/* Generates a new EC key pair. The private key is a random value and + * the public key is the result of performing a scalar point multiplication + * of that value with the curve's base point. + */ +SECStatus +EC_NewKey(ECParams *ecParams, ECPrivateKey **privKey) +{ + SECStatus rv = SECFailure; + int len; + unsigned char *privKeyBytes = NULL; + + if (!ecParams || ecParams->name == ECCurve_noName || !privKey) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + len = ecParams->order.len; + privKeyBytes = ec_GenerateRandomPrivateKey(ecParams->order.data, len); + if (privKeyBytes == NULL) + goto cleanup; + /* generate public key */ + CHECK_SEC_OK(ec_NewKey(ecParams, privKey, privKeyBytes, len)); + +cleanup: + if (privKeyBytes) { + PORT_ZFree(privKeyBytes, len); + } +#if EC_DEBUG + printf("EC_NewKey returning %s\n", + (rv == SECSuccess) ? "success" : "failure"); +#endif + + return rv; +} + +/* Validates an EC public key as described in Section 5.2.2 of + * X9.62. The ECDH primitive when used without the cofactor does + * not address small subgroup attacks, which may occur when the + * public key is not valid. These attacks can be prevented by + * validating the public key before using ECDH. + */ +SECStatus +EC_ValidatePublicKey(ECParams *ecParams, SECItem *publicValue) +{ + mp_int Px, Py; + ECGroup *group = NULL; + SECStatus rv = SECFailure; + mp_err err = MP_OKAY; + unsigned int len; + + if (!ecParams || ecParams->name == ECCurve_noName || + !publicValue || !publicValue->len) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Uses curve specific code for point validation. */ + if (ecParams->fieldID.type == ec_field_plain) { + const ECMethod *method = ec_get_method_from_name(ecParams->name); + if (method == NULL || method->validate == NULL) { + /* unknown curve */ + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return method->validate(publicValue); + } + + /* NOTE: We only support uncompressed points for now */ + len = (((unsigned int)ecParams->fieldID.size) + 7) >> 3; + if (publicValue->data[0] != EC_POINT_FORM_UNCOMPRESSED) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM); + return SECFailure; + } else if (publicValue->len != (2 * len + 1)) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + + MP_DIGITS(&Px) = 0; + MP_DIGITS(&Py) = 0; + CHECK_MPI_OK(mp_init(&Px)); + CHECK_MPI_OK(mp_init(&Py)); + + /* Initialize Px and Py */ + CHECK_MPI_OK(mp_read_unsigned_octets(&Px, publicValue->data + 1, (mp_size)len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&Py, publicValue->data + 1 + len, (mp_size)len)); + + /* construct from named params */ + group = ECGroup_fromName(ecParams->name); + if (group == NULL) { + /* + * ECGroup_fromName fails if ecParams->name is not a valid + * ECCurveName value, or if we run out of memory, or perhaps + * for other reasons. Unfortunately if ecParams->name is a + * valid ECCurveName value, we don't know what the right error + * code should be because ECGroup_fromName doesn't return an + * error code to the caller. Set err to MP_UNDEF because + * that's what ECGroup_fromName uses internally. + */ + if ((ecParams->name <= ECCurve_noName) || + (ecParams->name >= ECCurve_pastLastCurve)) { + err = MP_BADARG; + } else { + err = MP_UNDEF; + } + goto cleanup; + } + + /* validate public point */ + if ((err = ECPoint_validate(group, &Px, &Py)) < MP_YES) { + if (err == MP_NO) { + PORT_SetError(SEC_ERROR_BAD_KEY); + rv = SECFailure; + err = MP_OKAY; /* don't change the error code */ + } + goto cleanup; + } + + rv = SECSuccess; + +cleanup: + ECGroup_free(group); + mp_clear(&Px); + mp_clear(&Py); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** Performs an ECDH key derivation by computing the scalar point +** multiplication of privateValue and publicValue (with or without the +** cofactor) and returns the x-coordinate of the resulting elliptic +** curve point in derived secret. If successful, derivedSecret->data +** is set to the address of the newly allocated buffer containing the +** derived secret, and derivedSecret->len is the size of the secret +** produced. It is the caller's responsibility to free the allocated +** buffer containing the derived secret. +*/ +SECStatus +ECDH_Derive(SECItem *publicValue, + ECParams *ecParams, + SECItem *privateValue, + PRBool withCofactor, + SECItem *derivedSecret) +{ + SECStatus rv = SECFailure; + unsigned int len = 0; + SECItem pointQ = { siBuffer, NULL, 0 }; + mp_int k; /* to hold the private value */ + mp_err err = MP_OKAY; +#if EC_DEBUG + int i; +#endif + + if (!publicValue || !publicValue->len || + !ecParams || ecParams->name == ECCurve_noName || + !privateValue || !privateValue->len || !derivedSecret) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* + * Make sure the point is on the requested curve to avoid + * certain small subgroup attacks. + */ + if (EC_ValidatePublicKey(ecParams, publicValue) != SECSuccess) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + + /* Perform curve specific multiplication using ECMethod */ + if (ecParams->fieldID.type == ec_field_plain) { + const ECMethod *method; + memset(derivedSecret, 0, sizeof(*derivedSecret)); + derivedSecret = SECITEM_AllocItem(NULL, derivedSecret, EC_GetPointSize(ecParams)); + if (derivedSecret == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + method = ec_get_method_from_name(ecParams->name); + if (method == NULL || method->validate == NULL || + method->mul == NULL) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); + return SECFailure; + } + rv = method->mul(derivedSecret, privateValue, publicValue); + if (rv != SECSuccess) { + SECITEM_ZfreeItem(derivedSecret, PR_FALSE); + } + return rv; + } + + /* + * We fail if the public value is the point at infinity, since + * this produces predictable results. + */ + if (ec_point_at_infinity(publicValue)) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + + MP_DIGITS(&k) = 0; + memset(derivedSecret, 0, sizeof *derivedSecret); + len = (ecParams->fieldID.size + 7) >> 3; + pointQ.len = EC_GetPointSize(ecParams); + if ((pointQ.data = PORT_Alloc(pointQ.len)) == NULL) + goto cleanup; + + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_read_unsigned_octets(&k, privateValue->data, + (mp_size)privateValue->len)); + + if (withCofactor && (ecParams->cofactor != 1)) { + mp_int cofactor; + /* multiply k with the cofactor */ + MP_DIGITS(&cofactor) = 0; + CHECK_MPI_OK(mp_init(&cofactor)); + mp_set(&cofactor, ecParams->cofactor); + CHECK_MPI_OK(mp_mul(&k, &cofactor, &k)); + mp_clear(&cofactor); + } + + /* Multiply our private key and peer's public point */ + if (ec_points_mul(ecParams, NULL, &k, publicValue, &pointQ) != SECSuccess) { + goto cleanup; + } + if (ec_point_at_infinity(&pointQ)) { + PORT_SetError(SEC_ERROR_BAD_KEY); /* XXX better error code? */ + goto cleanup; + } + + /* Allocate memory for the derived secret and copy + * the x co-ordinate of pointQ into it. + */ + SECITEM_AllocItem(NULL, derivedSecret, len); + memcpy(derivedSecret->data, pointQ.data + 1, len); + + rv = SECSuccess; + +#if EC_DEBUG + printf("derived_secret:\n"); + for (i = 0; i < derivedSecret->len; i++) + printf("%02x:", derivedSecret->data[i]); + printf("\n"); +#endif + +cleanup: + mp_clear(&k); + + if (err) { + MP_TO_SEC_ERROR(err); + } + + if (pointQ.data) { + PORT_ZFree(pointQ.data, pointQ.len); + } + + return rv; +} + +/* Computes the ECDSA signature (a concatenation of two values r and s) + * on the digest using the given key and the random value kb (used in + * computing s). + */ + +static SECStatus +ec_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *kb, const int kblen) +{ + SECStatus rv = SECFailure; + mp_int x1; + mp_int d, k; /* private key, random integer */ + mp_int r, s; /* tuple (r, s) is the signature */ + mp_int t; /* holding tmp values */ + mp_int n; + mp_int ar; /* blinding value */ + mp_err err = MP_OKAY; + ECParams *ecParams = NULL; + SECItem kGpoint = { siBuffer, NULL, 0 }; + int flen = 0; /* length in bytes of the field size */ + unsigned olen; /* length in bytes of the base point order */ + unsigned obits; /* length in bits of the base point order */ + unsigned char *t2 = NULL; + +#if EC_DEBUG + char mpstr[256]; +#endif + + /* Initialize MPI integers. */ + /* must happen before the first potential call to cleanup */ + MP_DIGITS(&x1) = 0; + MP_DIGITS(&d) = 0; + MP_DIGITS(&k) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&s) = 0; + MP_DIGITS(&n) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&ar) = 0; + + /* Check args */ + if (!key || !signature || !digest || !kb || (kblen < 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } + + ecParams = &(key->ecParams); + flen = (ecParams->fieldID.size + 7) >> 3; + olen = ecParams->order.len; + if (signature->data == NULL) { + /* a call to get the signature length only */ + goto finish; + } + if (signature->len < 2 * olen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto cleanup; + } + + CHECK_MPI_OK(mp_init(&x1)); + CHECK_MPI_OK(mp_init(&d)); + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&s)); + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&ar)); + + SECITEM_TO_MPINT(ecParams->order, &n); + SECITEM_TO_MPINT(key->privateValue, &d); + + CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, kblen)); + /* Make sure k is in the interval [1, n-1] */ + if ((mp_cmp_z(&k) <= 0) || (mp_cmp(&k, &n) >= 0)) { +#if EC_DEBUG + printf("k is outside [1, n-1]\n"); + mp_tohex(&k, mpstr); + printf("k : %s \n", mpstr); + mp_tohex(&n, mpstr); + printf("n : %s \n", mpstr); +#endif + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + + /* + ** ANSI X9.62, Section 5.3.2, Step 2 + ** + ** Compute kG + */ + kGpoint.len = EC_GetPointSize(ecParams); + kGpoint.data = PORT_Alloc(kGpoint.len); + if ((kGpoint.data == NULL) || + (ec_points_mul(ecParams, &k, NULL, NULL, &kGpoint) != SECSuccess)) + goto cleanup; + + /* + ** ANSI X9.62, Section 5.3.3, Step 1 + ** + ** Extract the x co-ordinate of kG into x1 + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&x1, kGpoint.data + 1, + (mp_size)flen)); + + /* + ** ANSI X9.62, Section 5.3.3, Step 2 + ** + ** r = x1 mod n NOTE: n is the order of the curve + */ + CHECK_MPI_OK(mp_mod(&x1, &n, &r)); + + /* + ** ANSI X9.62, Section 5.3.3, Step 3 + ** + ** verify r != 0 + */ + if (mp_cmp_z(&r) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + + /* + ** ANSI X9.62, Section 5.3.3, Step 4 + ** + ** s = (k**-1 * (HASH(M) + d*r)) mod n + */ + SECITEM_TO_MPINT(*digest, &s); /* s = HASH(M) */ + + /* In the definition of EC signing, digests are truncated + * to the length of n in bits. + * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/ + CHECK_MPI_OK((obits = mpl_significant_bits(&n))); + if (digest->len * 8 > obits) { + mpl_rsh(&s, &s, digest->len * 8 - obits); + } + +#if EC_DEBUG + mp_todecimal(&n, mpstr); + printf("n : %s (dec)\n", mpstr); + mp_todecimal(&d, mpstr); + printf("d : %s (dec)\n", mpstr); + mp_tohex(&x1, mpstr); + printf("x1: %s\n", mpstr); + mp_todecimal(&s, mpstr); + printf("digest: %s (decimal)\n", mpstr); + mp_todecimal(&r, mpstr); + printf("r : %s (dec)\n", mpstr); + mp_tohex(&r, mpstr); + printf("r : %s\n", mpstr); +#endif + + if ((t2 = PORT_Alloc(2 * ecParams->order.len)) == NULL) { + rv = SECFailure; + goto cleanup; + } + if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */ + PORT_Memset(t2, 0, 2 * ecParams->order.len); + if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + CHECK_MPI_OK(mp_read_unsigned_octets(&ar, t2, 2 * ecParams->order.len)); /* ar <-$ Zn */ + + /* Using mp_invmod on k directly would leak bits from k. */ + CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ + CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ + /* To avoid leaking secret bits here the addition is blinded. */ + CHECK_MPI_OK(mp_mul(&d, &ar, &t)); /* t = d * ar */ + CHECK_MPI_OK(mp_mulmod(&t, &r, &n, &d)); /* d = t * r mod n */ + CHECK_MPI_OK(mp_mulmod(&s, &ar, &n, &t)); /* t = s * ar mod n */ + CHECK_MPI_OK(mp_add(&t, &d, &s)); /* s = t + d */ + CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */ + +#if EC_DEBUG + mp_todecimal(&s, mpstr); + printf("s : %s (dec)\n", mpstr); + mp_tohex(&s, mpstr); + printf("s : %s\n", mpstr); +#endif + + /* + ** ANSI X9.62, Section 5.3.3, Step 5 + ** + ** verify s != 0 + */ + if (mp_cmp_z(&s) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + + /* + ** + ** Signature is tuple (r, s) + */ + CHECK_MPI_OK(mp_to_fixlen_octets(&r, signature->data, olen)); + CHECK_MPI_OK(mp_to_fixlen_octets(&s, signature->data + olen, olen)); +finish: + signature->len = 2 * olen; + + rv = SECSuccess; + err = MP_OKAY; +cleanup: + mp_clear(&x1); + mp_clear(&d); + mp_clear(&k); + mp_clear(&r); + mp_clear(&s); + mp_clear(&n); + mp_clear(&t); + mp_clear(&ar); + + if (t2) { + PORT_ZFree(t2, 2 * ecParams->order.len); + } + + if (kGpoint.data) { + PORT_ZFree(kGpoint.data, kGpoint.len); + } + + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + +#if EC_DEBUG + printf("ECDSA signing with seed %s\n", + (rv == SECSuccess) ? "succeeded" : "failed"); +#endif + + return rv; +} + +SECStatus +ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *kb, const int kblen) +{ +#if EC_DEBUG || EC_DOUBLECHECK + + SECItem *signature2 = SECITEM_AllocItem(NULL, NULL, signature->len); + SECStatus signSuccess = ec_SignDigestWithSeed(key, signature, digest, kb, kblen); + SECStatus signSuccessDouble = ec_SignDigestWithSeed(key, signature2, digest, kb, kblen); + int signaturesEqual = NSS_SecureMemcmp(signature, signature2, signature->len); + SECStatus rv; + if ((signaturesEqual == 0) && (signSuccess == SECSuccess) && (signSuccessDouble == SECSuccess)) { + rv = SECSuccess; + } else { + rv = SECFailure; + } + +#if EC_DEBUG + printf("ECDSA signing with seed %s after signing twice\n", (rv == SECSuccess) ? "succeeded" : "failed"); +#endif + + SECITEM_FreeItem(signature2, PR_TRUE); + return rv; +#else + return ec_SignDigestWithSeed(key, signature, digest, kb, kblen); +#endif +} + +/* +** Computes the ECDSA signature on the digest using the given key +** and a random seed. +*/ +SECStatus +ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, const SECItem *digest) +{ + SECStatus rv = SECFailure; + int len; + unsigned char *kBytes = NULL; + + if (!key) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Generate random value k */ + len = key->ecParams.order.len; + kBytes = ec_GenerateRandomPrivateKey(key->ecParams.order.data, len); + if (kBytes == NULL) + goto cleanup; + + /* Generate ECDSA signature with the specified k value */ + rv = ECDSA_SignDigestWithSeed(key, signature, digest, kBytes, len); + +cleanup: + if (kBytes) { + PORT_ZFree(kBytes, len); + } + +#if EC_DEBUG + printf("ECDSA signing %s\n", + (rv == SECSuccess) ? "succeeded" : "failed"); +#endif + + return rv; +} + +/* +** Checks the signature on the given digest using the key provided. +** +** The key argument must represent a valid EC public key (a point on +** the relevant curve). If it is not a valid point, then the behavior +** of this function is undefined. In cases where a public key might +** not be valid, use EC_ValidatePublicKey to check. +*/ +SECStatus +ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + SECStatus rv = SECFailure; + mp_int r_, s_; /* tuple (r', s') is received signature) */ + mp_int c, u1, u2, v; /* intermediate values used in verification */ + mp_int x1; + mp_int n; + mp_err err = MP_OKAY; + ECParams *ecParams = NULL; + SECItem pointC = { siBuffer, NULL, 0 }; + int slen; /* length in bytes of a half signature (r or s) */ + int flen; /* length in bytes of the field size */ + unsigned olen; /* length in bytes of the base point order */ + unsigned obits; /* length in bits of the base point order */ + +#if EC_DEBUG + char mpstr[256]; + printf("ECDSA verification called\n"); +#endif + + /* Initialize MPI integers. */ + /* must happen before the first potential call to cleanup */ + MP_DIGITS(&r_) = 0; + MP_DIGITS(&s_) = 0; + MP_DIGITS(&c) = 0; + MP_DIGITS(&u1) = 0; + MP_DIGITS(&u2) = 0; + MP_DIGITS(&x1) = 0; + MP_DIGITS(&v) = 0; + MP_DIGITS(&n) = 0; + + /* Check args */ + if (!key || !signature || !digest) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } + + ecParams = &(key->ecParams); + flen = (ecParams->fieldID.size + 7) >> 3; + olen = ecParams->order.len; + if (signature->len == 0 || signature->len % 2 != 0 || + signature->len > 2 * olen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + goto cleanup; + } + slen = signature->len / 2; + + /* + * The incoming point has been verified in sftk_handlePublicKeyObject. + */ + + SECITEM_AllocItem(NULL, &pointC, EC_GetPointSize(ecParams)); + if (pointC.data == NULL) { + goto cleanup; + } + + CHECK_MPI_OK(mp_init(&r_)); + CHECK_MPI_OK(mp_init(&s_)); + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&u1)); + CHECK_MPI_OK(mp_init(&u2)); + CHECK_MPI_OK(mp_init(&x1)); + CHECK_MPI_OK(mp_init(&v)); + CHECK_MPI_OK(mp_init(&n)); + + /* + ** Convert received signature (r', s') into MPI integers. + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&r_, signature->data, slen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&s_, signature->data + slen, slen)); + + /* + ** ANSI X9.62, Section 5.4.2, Steps 1 and 2 + ** + ** Verify that 0 < r' < n and 0 < s' < n + */ + SECITEM_TO_MPINT(ecParams->order, &n); + if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 || + mp_cmp(&r_, &n) >= 0 || mp_cmp(&s_, &n) >= 0) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto cleanup; /* will return rv == SECFailure */ + } + + /* + ** ANSI X9.62, Section 5.4.2, Step 3 + ** + ** c = (s')**-1 mod n + */ + CHECK_MPI_OK(mp_invmod(&s_, &n, &c)); /* c = (s')**-1 mod n */ + + /* + ** ANSI X9.62, Section 5.4.2, Step 4 + ** + ** u1 = ((HASH(M')) * c) mod n + */ + SECITEM_TO_MPINT(*digest, &u1); /* u1 = HASH(M) */ + + /* In the definition of EC signing, digests are truncated + * to the length of n in bits. + * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/ + CHECK_MPI_OK((obits = mpl_significant_bits(&n))); + if (digest->len * 8 > obits) { /* u1 = HASH(M') */ + mpl_rsh(&u1, &u1, digest->len * 8 - obits); + } + +#if EC_DEBUG + mp_todecimal(&r_, mpstr); + printf("r_: %s (dec)\n", mpstr); + mp_todecimal(&s_, mpstr); + printf("s_: %s (dec)\n", mpstr); + mp_todecimal(&c, mpstr); + printf("c : %s (dec)\n", mpstr); + mp_todecimal(&u1, mpstr); + printf("digest: %s (dec)\n", mpstr); +#endif + + CHECK_MPI_OK(mp_mulmod(&u1, &c, &n, &u1)); /* u1 = u1 * c mod n */ + + /* + ** ANSI X9.62, Section 5.4.2, Step 4 + ** + ** u2 = ((r') * c) mod n + */ + CHECK_MPI_OK(mp_mulmod(&r_, &c, &n, &u2)); + + /* + ** ANSI X9.62, Section 5.4.3, Step 1 + ** + ** Compute u1*G + u2*Q + ** Here, A = u1.G B = u2.Q and C = A + B + ** If the result, C, is the point at infinity, reject the signature + */ + if (ec_points_mul(ecParams, &u1, &u2, &key->publicValue, &pointC) != SECSuccess) { + rv = SECFailure; + goto cleanup; + } + if (ec_point_at_infinity(&pointC)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; + goto cleanup; + } + + CHECK_MPI_OK(mp_read_unsigned_octets(&x1, pointC.data + 1, flen)); + + /* + ** ANSI X9.62, Section 5.4.4, Step 2 + ** + ** v = x1 mod n + */ + CHECK_MPI_OK(mp_mod(&x1, &n, &v)); + +#if EC_DEBUG + mp_todecimal(&r_, mpstr); + printf("r_: %s (dec)\n", mpstr); + mp_todecimal(&v, mpstr); + printf("v : %s (dec)\n", mpstr); +#endif + + /* + ** ANSI X9.62, Section 5.4.4, Step 3 + ** + ** Verification: v == r' + */ + if (mp_cmp(&v, &r_)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; /* Signature failed to verify. */ + } else { + rv = SECSuccess; /* Signature verified. */ + } + +#if EC_DEBUG + mp_todecimal(&u1, mpstr); + printf("u1: %s (dec)\n", mpstr); + mp_todecimal(&u2, mpstr); + printf("u2: %s (dec)\n", mpstr); + mp_tohex(&x1, mpstr); + printf("x1: %s\n", mpstr); + mp_todecimal(&v, mpstr); + printf("v : %s (dec)\n", mpstr); +#endif + +cleanup: + mp_clear(&r_); + mp_clear(&s_); + mp_clear(&c); + mp_clear(&u1); + mp_clear(&u2); + mp_clear(&x1); + mp_clear(&v); + mp_clear(&n); + + if (pointC.data) + SECITEM_ZfreeItem(&pointC, PR_FALSE); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + +#if EC_DEBUG + printf("ECDSA verification %s\n", + (rv == SECSuccess) ? "succeeded" : "failed"); +#endif + + return rv; +} diff --git a/security/nss/lib/freebl/ec.h b/security/nss/lib/freebl/ec.h new file mode 100644 index 0000000000..bb65e82cd9 --- /dev/null +++ b/security/nss/lib/freebl/ec.h @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ec_h_ +#define __ec_h_ + +#define EC_DEBUG 0 + +#define ANSI_X962_CURVE_OID_TOTAL_LEN 10 +#define SECG_CURVE_OID_TOTAL_LEN 7 +#define PKIX_NEWCURVES_OID_TOTAL_LEN 11 + +struct ECMethodStr { + ECCurveName name; + SECStatus (*mul)(SECItem *result, SECItem *scalar, SECItem *point); + SECStatus (*validate)(const SECItem *point); +}; +typedef struct ECMethodStr ECMethod; + +#endif /* __ec_h_ */ diff --git a/security/nss/lib/freebl/ecdecode.c b/security/nss/lib/freebl/ecdecode.c new file mode 100644 index 0000000000..652ad42d53 --- /dev/null +++ b/security/nss/lib/freebl/ecdecode.c @@ -0,0 +1,252 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "secoid.h" +#include "secitem.h" +#include "secerr.h" +#include "ec.h" +#include "ecl-curve.h" + +#define CHECK_OK(func) \ + if (func == NULL) \ + goto cleanup +#define CHECK_SEC_OK(func) \ + if (SECSuccess != (rv = func)) \ + goto cleanup + +/* Copy all of the fields from srcParams into dstParams + */ +SECStatus +EC_CopyParams(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams) +{ + SECStatus rv = SECFailure; + + dstParams->arena = arena; + dstParams->type = srcParams->type; + dstParams->fieldID.size = srcParams->fieldID.size; + dstParams->fieldID.type = srcParams->fieldID.type; + if (srcParams->fieldID.type == ec_field_GFp || + srcParams->fieldID.type == ec_field_plain) { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.prime, + &srcParams->fieldID.u.prime)); + } else { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.poly, + &srcParams->fieldID.u.poly)); + } + dstParams->fieldID.k1 = srcParams->fieldID.k1; + dstParams->fieldID.k2 = srcParams->fieldID.k2; + dstParams->fieldID.k3 = srcParams->fieldID.k3; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.a, + &srcParams->curve.a)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.b, + &srcParams->curve.b)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.seed, + &srcParams->curve.seed)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->base, + &srcParams->base)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->order, + &srcParams->order)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->DEREncoding, + &srcParams->DEREncoding)); + dstParams->name = srcParams->name; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curveOID, + &srcParams->curveOID)); + dstParams->cofactor = srcParams->cofactor; + + return SECSuccess; + +cleanup: + return SECFailure; +} + +static SECStatus +gf_populate_params_bytes(ECCurveName name, ECFieldType field_type, ECParams *params) +{ + SECStatus rv = SECFailure; + const ECCurveBytes *curveParams; + + if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve)) + goto cleanup; + params->name = name; + curveParams = ecCurve_map[params->name]; + CHECK_OK(curveParams); + params->fieldID.size = curveParams->size; + params->fieldID.type = field_type; + if (field_type != ec_field_GFp && field_type != ec_field_plain) { + return SECFailure; + } + params->fieldID.u.prime.len = curveParams->scalarSize; + params->fieldID.u.prime.data = (unsigned char *)curveParams->irr; + params->curve.a.len = curveParams->scalarSize; + params->curve.a.data = (unsigned char *)curveParams->curvea; + params->curve.b.len = curveParams->scalarSize; + params->curve.b.data = (unsigned char *)curveParams->curveb; + params->base.len = curveParams->pointSize; + params->base.data = (unsigned char *)curveParams->base; + params->order.len = curveParams->scalarSize; + params->order.data = (unsigned char *)curveParams->order; + params->cofactor = curveParams->cofactor; + + rv = SECSuccess; + +cleanup: + return rv; +} + +SECStatus +EC_FillParams(PLArenaPool *arena, const SECItem *encodedParams, + ECParams *params) +{ + SECStatus rv = SECFailure; + SECOidTag tag; + SECItem oid = { siBuffer, NULL, 0 }; + +#if EC_DEBUG + int i; + + printf("Encoded params in EC_DecodeParams: "); + for (i = 0; i < encodedParams->len; i++) { + printf("%02x:", encodedParams->data[i]); + } + printf("\n"); +#endif + + if ((encodedParams->len != ANSI_X962_CURVE_OID_TOTAL_LEN) && + (encodedParams->len != SECG_CURVE_OID_TOTAL_LEN) && + (encodedParams->len != PKIX_NEWCURVES_OID_TOTAL_LEN)) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); + return SECFailure; + }; + + oid.len = encodedParams->len - 2; + oid.data = encodedParams->data + 2; + if ((encodedParams->data[0] != SEC_ASN1_OBJECT_ID) || + ((tag = SECOID_FindOIDTag(&oid)) == SEC_OID_UNKNOWN)) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); + return SECFailure; + } + + params->arena = arena; + params->cofactor = 0; + params->type = ec_params_named; + params->name = ECCurve_noName; + + /* Fill out curveOID */ + params->curveOID.len = oid.len; + params->curveOID.data = (unsigned char *)PORT_ArenaAlloc(arena, oid.len); + if (params->curveOID.data == NULL) + goto cleanup; + memcpy(params->curveOID.data, oid.data, oid.len); + +#if EC_DEBUG + printf("Curve: %s\n", SECOID_FindOIDTagDescription(tag)); +#endif + + switch (tag) { + case SEC_OID_ANSIX962_EC_PRIME256V1: + /* Populate params for prime256v1 aka secp256r1 + * (the NIST P-256 curve) + */ + CHECK_SEC_OK(gf_populate_params_bytes(ECCurve_X9_62_PRIME_256V1, + ec_field_GFp, params)); + break; + + case SEC_OID_SECG_EC_SECP384R1: + /* Populate params for secp384r1 + * (the NIST P-384 curve) + */ + CHECK_SEC_OK(gf_populate_params_bytes(ECCurve_SECG_PRIME_384R1, + ec_field_GFp, params)); + break; + + case SEC_OID_SECG_EC_SECP521R1: + /* Populate params for secp521r1 + * (the NIST P-521 curve) + */ + CHECK_SEC_OK(gf_populate_params_bytes(ECCurve_SECG_PRIME_521R1, + ec_field_GFp, params)); + break; + + case SEC_OID_CURVE25519: + /* Populate params for Curve25519 */ + CHECK_SEC_OK(gf_populate_params_bytes(ECCurve25519, ec_field_plain, + params)); + break; + + default: + break; + }; + +cleanup: + if (!params->cofactor) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); +#if EC_DEBUG + printf("Unrecognized curve, returning NULL params\n"); +#endif + } + + return rv; +} + +SECStatus +EC_DecodeParams(const SECItem *encodedParams, ECParams **ecparams) +{ + PLArenaPool *arena; + ECParams *params; + SECStatus rv = SECFailure; + + /* Initialize an arena for the ECParams structure */ + if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE))) + return SECFailure; + + params = (ECParams *)PORT_ArenaZAlloc(arena, sizeof(ECParams)); + if (!params) { + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + + /* Copy the encoded params */ + SECITEM_AllocItem(arena, &(params->DEREncoding), + encodedParams->len); + memcpy(params->DEREncoding.data, encodedParams->data, encodedParams->len); + + /* Fill out the rest of the ECParams structure based on + * the encoded params + */ + rv = EC_FillParams(arena, encodedParams, params); + if (rv == SECFailure) { + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } else { + *ecparams = params; + ; + return SECSuccess; + } +} + +int +EC_GetPointSize(const ECParams *params) +{ + ECCurveName name = params->name; + const ECCurveBytes *curveParams; + + if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve) || + ((curveParams = ecCurve_map[name]) == NULL)) { + /* unknown curve, calculate point size from params. assume standard curves with 2 points + * and a point compression indicator byte */ + int sizeInBytes = (params->fieldID.size + 7) / 8; + return sizeInBytes * 2 + 1; + } + if (name == ECCurve25519) { + /* Only X here */ + return curveParams->scalarSize; + } + return curveParams->pointSize - 1; +} diff --git a/security/nss/lib/freebl/ecl/README b/security/nss/lib/freebl/ecl/README new file mode 100644 index 0000000000..2996822c88 --- /dev/null +++ b/security/nss/lib/freebl/ecl/README @@ -0,0 +1,163 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +The ECL exposes routines for constructing and converting curve +parameters for internal use. + + +HEADER FILES +============ + +ecl-exp.h - Exports data structures and curve names. For use by code +that does not have access to mp_ints. + +ecl-curve.h - Provides hex encodings (in the form of ECCurveParams +structs) of standardizes elliptic curve domain parameters and mappings +from ECCurveName to ECCurveParams. For use by code that does not have +access to mp_ints. + +ecl.h - Interface to constructors for curve parameters and group object, +and point multiplication operations. Used by higher level algorithms +(like ECDH and ECDSA) to actually perform elliptic curve cryptography. + +ecl-priv.h - Data structures and functions for internal use within the +library. + +ecp.h - Internal header file that contains all functions for point +arithmetic over prime fields. + +DATA STRUCTURES AND TYPES +========================= + +ECCurveName (from ecl-exp.h) - Opaque name for standardized elliptic +curve domain parameters. + +ECCurveParams (from ecl-exp.h) - Provides hexadecimal encoding +of elliptic curve domain parameters. Can be generated by a user +and passed to ECGroup_fromHex or can be generated from a name by +EC_GetNamedCurveParams. ecl-curve.h contains ECCurveParams structs for +the standardized curves defined by ECCurveName. + +ECGroup (from ecl.h and ecl-priv.h) - Opaque data structure that +represents a group of elliptic curve points for a particular set of +elliptic curve domain parameters. Contains all domain parameters (curve +a and b, field, base point) as well as pointers to the functions that +should be used for point arithmetic and the underlying field GFMethod. +Generated by either ECGroup_fromHex or ECGroup_fromName. + +GFMethod (from ecl-priv.h) - Represents a field underlying a set of +elliptic curve domain parameters. Contains the irreducible that defines +the field (either the prime or the binary polynomial) as well as +pointers to the functions that should be used for field arithmetic. + +ARITHMETIC FUNCTIONS +==================== + +Higher-level algorithms (like ECDH and ECDSA) should call ECPoint_mul +or ECPoints_mul (from ecl.h) to do point arithmetic. These functions +will choose which underlying algorithms to use, based on the ECGroup +structure. + +Point Multiplication +-------------------- + +ecl_mult.c provides the ECPoints_mul and ECPoint_mul wrappers. +It also provides two implementations for the pts_mul operation - +ec_pts_mul_basic (which computes kP, lQ, and then adds kP + lQ) and +ec_pts_mul_simul_w2 (which does a simultaneous point multiplication +using a table with window size 2*2). + +ec_naf.c provides an implementation of an algorithm to calculate a +non-adjacent form of a scalar, minimizing the number of point +additions that need to be done in a point multiplication. + +Point Arithmetic over Prime Fields +---------------------------------- + +ecp_aff.c provides point arithmetic using affine coordinates. + +ecp_jac.c provides point arithmetic using Jacobian projective +coordinates and mixed Jacobian-affine coordinates. (Jacobian projective +coordinates represent a point (x, y) as (X, Y, Z), where x=X/Z^2, +y=Y/Z^3). + +ecp_jm.c provides point arithmetic using Modified Jacobian +coordinates and mixed Modified_Jacobian-affine coordinates. +(Modified Jacobian coordinates represent a point (x, y) +as (X, Y, Z, a*Z^4), where x=X/Z^2, y=Y/Z^3, and a is +the linear coefficient in the curve defining equation). + +ecp_192.c and ecp_224.c provide optimized field arithmetic. + +Field Arithmetic +---------------- + +ecl_gf.c provides constructors for field objects (GFMethod) with the +functions GFMethod_cons*. It also provides wrappers around the basic +field operations. + +Prime Field Arithmetic +---------------------- + +The mpi library provides the basic prime field arithmetic. + +ecp_mont.c provides wrappers around the Montgomery multiplication +functions from the mpi library and adds encoding and decoding functions. +It also provides the function to construct a GFMethod object using +Montgomery multiplication. + +ecp_192.c and ecp_224.c provide optimized modular reduction for the +fields defined by nistp192 and nistp224 primes. + +ecl_gf.c provides wrappers around the basic field operations. + +Field Encoding +-------------- + +By default, field elements are encoded in their basic form. It is +possible to use an alternative encoding, however. For example, it is +possible to Montgomery representation of prime field elements and +take advantage of the fast modular multiplication that Montgomery +representation provides. The process of converting from basic form to +Montgomery representation is called field encoding, and the opposite +process would be field decoding. All internal point operations assume +that the operands are field encoded as appropriate. By rewiring the +underlying field arithmetic to perform operations on these encoded +values, the same overlying point arithmetic operations can be used +regardless of field representation. + +ALGORITHM WIRING +================ + +The EC library allows point and field arithmetic algorithms to be +substituted ("wired-in") on a fine-grained basis. This allows for +generic algorithms and algorithms that are optimized for a particular +curve, field, or architecture, to coexist and to be automatically +selected at runtime. + +Wiring Mechanism +---------------- + +The ECGroup and GFMethod structure contain pointers to the point and +field arithmetic functions, respectively, that are to be used in +operations. + +The selection of algorithms to use is handled in the function +ecgroup_fromNameAndHex in ecl.c. + +Default Wiring +-------------- + +Curves over prime fields by default use montgomery field arithmetic, +point multiplication using 5-bit window non-adjacent-form with +Modified Jacobian coordinates, and 2*2-bit simultaneous point +multiplication using Jacobian coordinates. +(Wiring in function ECGroup_consGFp_mont in ecl.c.) + +Curves over prime fields that have optimized modular reduction (i.e., +secp160r1, nistp192, and nistp224) do not use Montgomery field +arithmetic. Instead, they use basic field arithmetic with their +optimized reduction (as in ecp_192.c and ecp_224.c). They +use the same point multiplication and simultaneous point multiplication +algorithms as other curves over prime fields. diff --git a/security/nss/lib/freebl/ecl/curve25519_32.c b/security/nss/lib/freebl/ecl/curve25519_32.c new file mode 100644 index 0000000000..c282456809 --- /dev/null +++ b/security/nss/lib/freebl/ecl/curve25519_32.c @@ -0,0 +1,1213 @@ +// The MIT License (MIT) +// +// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file). +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/* + * Derived from machine-generated code via Fiat-Crypto: + * https://github.com/mit-plv/fiat-crypto and https://github.com/briansmith/ring + * + * The below captures notable changes: + * + * 1. Convert custom integer types to stdint.h types + */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include "ecl-priv.h" + +/* fe means field element. Here the field is \Z/(2^255-19). An element t, + * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 + * t[3]+2^102 t[4]+...+2^230 t[9]. + * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc. + * Multiplication and carrying produce fe from fe_loose. + */ +typedef struct fe { + uint32_t v[10]; +} fe; + +/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc + * Addition and subtraction produce fe_loose from (fe, fe). + */ +typedef struct fe_loose { + uint32_t v[10]; +} fe_loose; + +#define assert_fe(f) \ + do { \ + for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \ + PORT_Assert(f[_assert_fe_i] <= \ + ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \ + } \ + } while (0) + +#define assert_fe_loose(f) \ + do { \ + for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \ + PORT_Assert(f[_assert_fe_i] <= \ + ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \ + } \ + } while (0) + +/* + * The function fiat_25519_subborrowx_u26 is a subtraction with borrow. + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^26 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^26⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x3ffffff] + * arg3: [0x0 ~> 0x3ffffff] + * Output Bounds: + * out1: [0x0 ~> 0x3ffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_25519_subborrowx_u26(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3) +{ + int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3); + int8_t x2 = (int8_t)(x1 >> 26); + uint32_t x3 = (x1 & UINT32_C(0x3ffffff)); + *out1 = x3; + *out2 = (uint8_t)(0x0 - x2); +} + +/* + * The function fiat_25519_subborrowx_u25 is a subtraction with borrow. + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^25 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^25⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x1ffffff] + * arg3: [0x0 ~> 0x1ffffff] + * Output Bounds: + * out1: [0x0 ~> 0x1ffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_25519_subborrowx_u25(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3) +{ + int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3); + int8_t x2 = (int8_t)(x1 >> 25); + uint32_t x3 = (x1 & UINT32_C(0x1ffffff)); + *out1 = x3; + *out2 = (uint8_t)(0x0 - x2); +} + +/* + * The function fiat_25519_addcarryx_u26 is an addition with carry. + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^26 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^26⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x3ffffff] + * arg3: [0x0 ~> 0x3ffffff] + * Output Bounds: + * out1: [0x0 ~> 0x3ffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_25519_addcarryx_u26(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3) +{ + uint32_t x1 = ((arg1 + arg2) + arg3); + uint32_t x2 = (x1 & UINT32_C(0x3ffffff)); + uint8_t x3 = (uint8_t)(x1 >> 26); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_25519_addcarryx_u25 is an addition with carry. + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^25 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^25⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x1ffffff] + * arg3: [0x0 ~> 0x1ffffff] + * Output Bounds: + * out1: [0x0 ~> 0x1ffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_25519_addcarryx_u25(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3) +{ + uint32_t x1 = ((arg1 + arg2) + arg3); + uint32_t x2 = (x1 & UINT32_C(0x1ffffff)); + uint8_t x3 = (uint8_t)(x1 >> 25); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_25519_cmovznz_u32 is a single-word conditional move. + * Postconditions: + * out1 = (if arg1 = 0 then arg2 else arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffff] + * arg3: [0x0 ~> 0xffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + */ +static void +fiat_25519_cmovznz_u32(uint32_t *out1, uint8_t arg1, uint32_t arg2, uint32_t arg3) +{ + uint8_t x1 = (!(!arg1)); + uint32_t x2 = ((int8_t)(0x0 - x1) & UINT32_C(0xffffffff)); + uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + *out1 = x3; +} + +/* + * The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order. + * Postconditions: + * eval out1 mod m = bytes_eval arg1 mod m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]] + * Output Bounds: + * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + */ +static void +fiat_25519_from_bytes(uint32_t out1[10], const uint8_t arg1[32]) +{ + uint32_t x1 = ((uint32_t)(arg1[31]) << 18); + uint32_t x2 = ((uint32_t)(arg1[30]) << 10); + uint32_t x3 = ((uint32_t)(arg1[29]) << 2); + uint32_t x4 = ((uint32_t)(arg1[28]) << 20); + uint32_t x5 = ((uint32_t)(arg1[27]) << 12); + uint32_t x6 = ((uint32_t)(arg1[26]) << 4); + uint32_t x7 = ((uint32_t)(arg1[25]) << 21); + uint32_t x8 = ((uint32_t)(arg1[24]) << 13); + uint32_t x9 = ((uint32_t)(arg1[23]) << 5); + uint32_t x10 = ((uint32_t)(arg1[22]) << 23); + uint32_t x11 = ((uint32_t)(arg1[21]) << 15); + uint32_t x12 = ((uint32_t)(arg1[20]) << 7); + uint32_t x13 = ((uint32_t)(arg1[19]) << 24); + uint32_t x14 = ((uint32_t)(arg1[18]) << 16); + uint32_t x15 = ((uint32_t)(arg1[17]) << 8); + uint8_t x16 = (arg1[16]); + uint32_t x17 = ((uint32_t)(arg1[15]) << 18); + uint32_t x18 = ((uint32_t)(arg1[14]) << 10); + uint32_t x19 = ((uint32_t)(arg1[13]) << 2); + uint32_t x20 = ((uint32_t)(arg1[12]) << 19); + uint32_t x21 = ((uint32_t)(arg1[11]) << 11); + uint32_t x22 = ((uint32_t)(arg1[10]) << 3); + uint32_t x23 = ((uint32_t)(arg1[9]) << 21); + uint32_t x24 = ((uint32_t)(arg1[8]) << 13); + uint32_t x25 = ((uint32_t)(arg1[7]) << 5); + uint32_t x26 = ((uint32_t)(arg1[6]) << 22); + uint32_t x27 = ((uint32_t)(arg1[5]) << 14); + uint32_t x28 = ((uint32_t)(arg1[4]) << 6); + uint32_t x29 = ((uint32_t)(arg1[3]) << 24); + uint32_t x30 = ((uint32_t)(arg1[2]) << 16); + uint32_t x31 = ((uint32_t)(arg1[1]) << 8); + uint8_t x32 = (arg1[0]); + uint32_t x33 = (x32 + (x31 + (x30 + x29))); + uint8_t x34 = (uint8_t)(x33 >> 26); + uint32_t x35 = (x33 & UINT32_C(0x3ffffff)); + uint32_t x36 = (x3 + (x2 + x1)); + uint32_t x37 = (x6 + (x5 + x4)); + uint32_t x38 = (x9 + (x8 + x7)); + uint32_t x39 = (x12 + (x11 + x10)); + uint32_t x40 = (x16 + (x15 + (x14 + x13))); + uint32_t x41 = (x19 + (x18 + x17)); + uint32_t x42 = (x22 + (x21 + x20)); + uint32_t x43 = (x25 + (x24 + x23)); + uint32_t x44 = (x28 + (x27 + x26)); + uint32_t x45 = (x34 + x44); + uint8_t x46 = (uint8_t)(x45 >> 25); + uint32_t x47 = (x45 & UINT32_C(0x1ffffff)); + uint32_t x48 = (x46 + x43); + uint8_t x49 = (uint8_t)(x48 >> 26); + uint32_t x50 = (x48 & UINT32_C(0x3ffffff)); + uint32_t x51 = (x49 + x42); + uint8_t x52 = (uint8_t)(x51 >> 25); + uint32_t x53 = (x51 & UINT32_C(0x1ffffff)); + uint32_t x54 = (x52 + x41); + uint32_t x55 = (x54 & UINT32_C(0x3ffffff)); + uint8_t x56 = (uint8_t)(x40 >> 25); + uint32_t x57 = (x40 & UINT32_C(0x1ffffff)); + uint32_t x58 = (x56 + x39); + uint8_t x59 = (uint8_t)(x58 >> 26); + uint32_t x60 = (x58 & UINT32_C(0x3ffffff)); + uint32_t x61 = (x59 + x38); + uint8_t x62 = (uint8_t)(x61 >> 25); + uint32_t x63 = (x61 & UINT32_C(0x1ffffff)); + uint32_t x64 = (x62 + x37); + uint8_t x65 = (uint8_t)(x64 >> 26); + uint32_t x66 = (x64 & UINT32_C(0x3ffffff)); + uint32_t x67 = (x65 + x36); + out1[0] = x35; + out1[1] = x47; + out1[2] = x50; + out1[3] = x53; + out1[4] = x55; + out1[5] = x57; + out1[6] = x60; + out1[7] = x63; + out1[8] = x66; + out1[9] = x67; +} + +static void +fe_frombytes_strict(fe *h, const uint8_t s[32]) +{ + // |fiat_25519_from_bytes| requires the top-most bit be clear. + PORT_Assert((s[31] & 0x80) == 0); + fiat_25519_from_bytes(h->v, s); + assert_fe(h->v); +} + +static inline void +fe_frombytes(fe *h, const uint8_t *s) +{ + uint8_t s_copy[32]; + memcpy(s_copy, s, 32); + s_copy[31] &= 0x7f; + fe_frombytes_strict(h, s_copy); +} + +/* + * The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order. + * Postconditions: + * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31] + * + * Input Bounds: + * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + * Output Bounds: + * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]] + */ +static void +fiat_25519_to_bytes(uint8_t out1[32], const uint32_t arg1[10]) +{ + uint32_t x1; + uint8_t x2; + fiat_25519_subborrowx_u26(&x1, &x2, 0x0, (arg1[0]), UINT32_C(0x3ffffed)); + uint32_t x3; + uint8_t x4; + fiat_25519_subborrowx_u25(&x3, &x4, x2, (arg1[1]), UINT32_C(0x1ffffff)); + uint32_t x5; + uint8_t x6; + fiat_25519_subborrowx_u26(&x5, &x6, x4, (arg1[2]), UINT32_C(0x3ffffff)); + uint32_t x7; + uint8_t x8; + fiat_25519_subborrowx_u25(&x7, &x8, x6, (arg1[3]), UINT32_C(0x1ffffff)); + uint32_t x9; + uint8_t x10; + fiat_25519_subborrowx_u26(&x9, &x10, x8, (arg1[4]), UINT32_C(0x3ffffff)); + uint32_t x11; + uint8_t x12; + fiat_25519_subborrowx_u25(&x11, &x12, x10, (arg1[5]), UINT32_C(0x1ffffff)); + uint32_t x13; + uint8_t x14; + fiat_25519_subborrowx_u26(&x13, &x14, x12, (arg1[6]), UINT32_C(0x3ffffff)); + uint32_t x15; + uint8_t x16; + fiat_25519_subborrowx_u25(&x15, &x16, x14, (arg1[7]), UINT32_C(0x1ffffff)); + uint32_t x17; + uint8_t x18; + fiat_25519_subborrowx_u26(&x17, &x18, x16, (arg1[8]), UINT32_C(0x3ffffff)); + uint32_t x19; + uint8_t x20; + fiat_25519_subborrowx_u25(&x19, &x20, x18, (arg1[9]), UINT32_C(0x1ffffff)); + uint32_t x21; + fiat_25519_cmovznz_u32(&x21, x20, 0x0, UINT32_C(0xffffffff)); + uint32_t x22; + uint8_t x23; + fiat_25519_addcarryx_u26(&x22, &x23, 0x0, x1, (x21 & UINT32_C(0x3ffffed))); + uint32_t x24; + uint8_t x25; + fiat_25519_addcarryx_u25(&x24, &x25, x23, x3, (x21 & UINT32_C(0x1ffffff))); + uint32_t x26; + uint8_t x27; + fiat_25519_addcarryx_u26(&x26, &x27, x25, x5, (x21 & UINT32_C(0x3ffffff))); + uint32_t x28; + uint8_t x29; + fiat_25519_addcarryx_u25(&x28, &x29, x27, x7, (x21 & UINT32_C(0x1ffffff))); + uint32_t x30; + uint8_t x31; + fiat_25519_addcarryx_u26(&x30, &x31, x29, x9, (x21 & UINT32_C(0x3ffffff))); + uint32_t x32; + uint8_t x33; + fiat_25519_addcarryx_u25(&x32, &x33, x31, x11, (x21 & UINT32_C(0x1ffffff))); + uint32_t x34; + uint8_t x35; + fiat_25519_addcarryx_u26(&x34, &x35, x33, x13, (x21 & UINT32_C(0x3ffffff))); + uint32_t x36; + uint8_t x37; + fiat_25519_addcarryx_u25(&x36, &x37, x35, x15, (x21 & UINT32_C(0x1ffffff))); + uint32_t x38; + uint8_t x39; + fiat_25519_addcarryx_u26(&x38, &x39, x37, x17, (x21 & UINT32_C(0x3ffffff))); + uint32_t x40; + uint8_t x41; + fiat_25519_addcarryx_u25(&x40, &x41, x39, x19, (x21 & UINT32_C(0x1ffffff))); + uint32_t x42 = (x40 << 6); + uint32_t x43 = (x38 << 4); + uint32_t x44 = (x36 << 3); + uint32_t x45 = (x34 * (uint32_t)0x2); + uint32_t x46 = (x30 << 6); + uint32_t x47 = (x28 << 5); + uint32_t x48 = (x26 << 3); + uint32_t x49 = (x24 << 2); + uint32_t x50 = (x22 >> 8); + uint8_t x51 = (uint8_t)(x22 & UINT8_C(0xff)); + uint32_t x52 = (x50 >> 8); + uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff)); + uint8_t x54 = (uint8_t)(x52 >> 8); + uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff)); + uint32_t x56 = (x54 + x49); + uint32_t x57 = (x56 >> 8); + uint8_t x58 = (uint8_t)(x56 & UINT8_C(0xff)); + uint32_t x59 = (x57 >> 8); + uint8_t x60 = (uint8_t)(x57 & UINT8_C(0xff)); + uint8_t x61 = (uint8_t)(x59 >> 8); + uint8_t x62 = (uint8_t)(x59 & UINT8_C(0xff)); + uint32_t x63 = (x61 + x48); + uint32_t x64 = (x63 >> 8); + uint8_t x65 = (uint8_t)(x63 & UINT8_C(0xff)); + uint32_t x66 = (x64 >> 8); + uint8_t x67 = (uint8_t)(x64 & UINT8_C(0xff)); + uint8_t x68 = (uint8_t)(x66 >> 8); + uint8_t x69 = (uint8_t)(x66 & UINT8_C(0xff)); + uint32_t x70 = (x68 + x47); + uint32_t x71 = (x70 >> 8); + uint8_t x72 = (uint8_t)(x70 & UINT8_C(0xff)); + uint32_t x73 = (x71 >> 8); + uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff)); + uint8_t x75 = (uint8_t)(x73 >> 8); + uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff)); + uint32_t x77 = (x75 + x46); + uint32_t x78 = (x77 >> 8); + uint8_t x79 = (uint8_t)(x77 & UINT8_C(0xff)); + uint32_t x80 = (x78 >> 8); + uint8_t x81 = (uint8_t)(x78 & UINT8_C(0xff)); + uint8_t x82 = (uint8_t)(x80 >> 8); + uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff)); + uint8_t x84 = (uint8_t)(x82 & UINT8_C(0xff)); + uint32_t x85 = (x32 >> 8); + uint8_t x86 = (uint8_t)(x32 & UINT8_C(0xff)); + uint32_t x87 = (x85 >> 8); + uint8_t x88 = (uint8_t)(x85 & UINT8_C(0xff)); + uint8_t x89 = (uint8_t)(x87 >> 8); + uint8_t x90 = (uint8_t)(x87 & UINT8_C(0xff)); + uint32_t x91 = (x89 + x45); + uint32_t x92 = (x91 >> 8); + uint8_t x93 = (uint8_t)(x91 & UINT8_C(0xff)); + uint32_t x94 = (x92 >> 8); + uint8_t x95 = (uint8_t)(x92 & UINT8_C(0xff)); + uint8_t x96 = (uint8_t)(x94 >> 8); + uint8_t x97 = (uint8_t)(x94 & UINT8_C(0xff)); + uint32_t x98 = (x96 + x44); + uint32_t x99 = (x98 >> 8); + uint8_t x100 = (uint8_t)(x98 & UINT8_C(0xff)); + uint32_t x101 = (x99 >> 8); + uint8_t x102 = (uint8_t)(x99 & UINT8_C(0xff)); + uint8_t x103 = (uint8_t)(x101 >> 8); + uint8_t x104 = (uint8_t)(x101 & UINT8_C(0xff)); + uint32_t x105 = (x103 + x43); + uint32_t x106 = (x105 >> 8); + uint8_t x107 = (uint8_t)(x105 & UINT8_C(0xff)); + uint32_t x108 = (x106 >> 8); + uint8_t x109 = (uint8_t)(x106 & UINT8_C(0xff)); + uint8_t x110 = (uint8_t)(x108 >> 8); + uint8_t x111 = (uint8_t)(x108 & UINT8_C(0xff)); + uint32_t x112 = (x110 + x42); + uint32_t x113 = (x112 >> 8); + uint8_t x114 = (uint8_t)(x112 & UINT8_C(0xff)); + uint32_t x115 = (x113 >> 8); + uint8_t x116 = (uint8_t)(x113 & UINT8_C(0xff)); + uint8_t x117 = (uint8_t)(x115 >> 8); + uint8_t x118 = (uint8_t)(x115 & UINT8_C(0xff)); + out1[0] = x51; + out1[1] = x53; + out1[2] = x55; + out1[3] = x58; + out1[4] = x60; + out1[5] = x62; + out1[6] = x65; + out1[7] = x67; + out1[8] = x69; + out1[9] = x72; + out1[10] = x74; + out1[11] = x76; + out1[12] = x79; + out1[13] = x81; + out1[14] = x83; + out1[15] = x84; + out1[16] = x86; + out1[17] = x88; + out1[18] = x90; + out1[19] = x93; + out1[20] = x95; + out1[21] = x97; + out1[22] = x100; + out1[23] = x102; + out1[24] = x104; + out1[25] = x107; + out1[26] = x109; + out1[27] = x111; + out1[28] = x114; + out1[29] = x116; + out1[30] = x118; + out1[31] = x117; +} + +static inline void +fe_tobytes(uint8_t s[32], const fe *f) +{ + assert_fe(f->v); + fiat_25519_to_bytes(s, f->v); +} + +/* h = f */ +static inline void +fe_copy(fe *h, const fe *f) +{ + memmove(h, f, sizeof(fe)); +} + +static inline void +fe_copy_lt(fe_loose *h, const fe *f) +{ + PORT_Assert(sizeof(fe) == sizeof(fe_loose)); + memmove(h, f, sizeof(fe)); +} + +/* + * h = 0 + */ +static inline void +fe_0(fe *h) +{ + memset(h, 0, sizeof(fe)); +} + +/* + * h = 1 + */ +static inline void +fe_1(fe *h) +{ + memset(h, 0, sizeof(fe)); + h->v[0] = 1; +} +/* + * The function fiat_25519_add adds two field elements. + * Postconditions: + * eval out1 mod m = (eval arg1 + eval arg2) mod m + * + * Input Bounds: + * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + * arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + * Output Bounds: + * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] + */ +static void +fiat_25519_add(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) +{ + uint32_t x1 = ((arg1[0]) + (arg2[0])); + uint32_t x2 = ((arg1[1]) + (arg2[1])); + uint32_t x3 = ((arg1[2]) + (arg2[2])); + uint32_t x4 = ((arg1[3]) + (arg2[3])); + uint32_t x5 = ((arg1[4]) + (arg2[4])); + uint32_t x6 = ((arg1[5]) + (arg2[5])); + uint32_t x7 = ((arg1[6]) + (arg2[6])); + uint32_t x8 = ((arg1[7]) + (arg2[7])); + uint32_t x9 = ((arg1[8]) + (arg2[8])); + uint32_t x10 = ((arg1[9]) + (arg2[9])); + out1[0] = x1; + out1[1] = x2; + out1[2] = x3; + out1[3] = x4; + out1[4] = x5; + out1[5] = x6; + out1[6] = x7; + out1[7] = x8; + out1[8] = x9; + out1[9] = x10; +} + +/* + * Add two field elements. + * h = f + g + */ +static inline void +fe_add(fe_loose *h, const fe *f, const fe *g) +{ + assert_fe(f->v); + assert_fe(g->v); + fiat_25519_add(h->v, f->v, g->v); + assert_fe_loose(h->v); +} + +/* + * The function fiat_25519_sub subtracts two field elements. + * Postconditions: + * eval out1 mod m = (eval arg1 - eval arg2) mod m + * + * Input Bounds: + * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + * arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + * Output Bounds: + * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] + */ +static void +fiat_25519_sub(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) +{ + uint32_t x1 = ((UINT32_C(0x7ffffda) + (arg1[0])) - (arg2[0])); + uint32_t x2 = ((UINT32_C(0x3fffffe) + (arg1[1])) - (arg2[1])); + uint32_t x3 = ((UINT32_C(0x7fffffe) + (arg1[2])) - (arg2[2])); + uint32_t x4 = ((UINT32_C(0x3fffffe) + (arg1[3])) - (arg2[3])); + uint32_t x5 = ((UINT32_C(0x7fffffe) + (arg1[4])) - (arg2[4])); + uint32_t x6 = ((UINT32_C(0x3fffffe) + (arg1[5])) - (arg2[5])); + uint32_t x7 = ((UINT32_C(0x7fffffe) + (arg1[6])) - (arg2[6])); + uint32_t x8 = ((UINT32_C(0x3fffffe) + (arg1[7])) - (arg2[7])); + uint32_t x9 = ((UINT32_C(0x7fffffe) + (arg1[8])) - (arg2[8])); + uint32_t x10 = ((UINT32_C(0x3fffffe) + (arg1[9])) - (arg2[9])); + out1[0] = x1; + out1[1] = x2; + out1[2] = x3; + out1[3] = x4; + out1[4] = x5; + out1[5] = x6; + out1[6] = x7; + out1[7] = x8; + out1[8] = x9; + out1[9] = x10; +} + +/* + * Subtract two field elements. + * h = f - g + */ +static void +fe_sub(fe_loose *h, const fe *f, const fe *g) +{ + assert_fe(f->v); + assert_fe(g->v); + fiat_25519_sub(h->v, f->v, g->v); + assert_fe_loose(h->v); +} + +/* + * The function fiat_25519_carry_mul multiplies two field elements and reduces the result. + * Postconditions: + * eval out1 mod m = (eval arg1 * eval arg2) mod m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] + * arg2: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] + * Output Bounds: + * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] + */ +static void +fiat_25519_carry_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) +{ + uint64_t x1 = ((uint64_t)(arg1[9]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x2 = ((uint64_t)(arg1[9]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x3 = ((uint64_t)(arg1[9]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x4 = ((uint64_t)(arg1[9]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13))); + uint64_t x5 = ((uint64_t)(arg1[9]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x6 = ((uint64_t)(arg1[9]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13))); + uint64_t x7 = ((uint64_t)(arg1[9]) * ((arg2[3]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x8 = ((uint64_t)(arg1[9]) * ((arg2[2]) * (uint32_t)UINT8_C(0x13))); + uint64_t x9 = ((uint64_t)(arg1[9]) * ((arg2[1]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x10 = ((uint64_t)(arg1[8]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13))); + uint64_t x11 = ((uint64_t)(arg1[8]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x12 = ((uint64_t)(arg1[8]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13))); + uint64_t x13 = ((uint64_t)(arg1[8]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13))); + uint64_t x14 = ((uint64_t)(arg1[8]) * ((arg2[5]) * (uint32_t)UINT8_C(0x13))); + uint64_t x15 = ((uint64_t)(arg1[8]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13))); + uint64_t x16 = ((uint64_t)(arg1[8]) * ((arg2[3]) * (uint32_t)UINT8_C(0x13))); + uint64_t x17 = ((uint64_t)(arg1[8]) * ((arg2[2]) * (uint32_t)UINT8_C(0x13))); + uint64_t x18 = ((uint64_t)(arg1[7]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x19 = ((uint64_t)(arg1[7]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x20 = ((uint64_t)(arg1[7]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x21 = ((uint64_t)(arg1[7]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13))); + uint64_t x22 = ((uint64_t)(arg1[7]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x23 = ((uint64_t)(arg1[7]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13))); + uint64_t x24 = ((uint64_t)(arg1[7]) * ((arg2[3]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x25 = ((uint64_t)(arg1[6]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13))); + uint64_t x26 = ((uint64_t)(arg1[6]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x27 = ((uint64_t)(arg1[6]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13))); + uint64_t x28 = ((uint64_t)(arg1[6]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13))); + uint64_t x29 = ((uint64_t)(arg1[6]) * ((arg2[5]) * (uint32_t)UINT8_C(0x13))); + uint64_t x30 = ((uint64_t)(arg1[6]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13))); + uint64_t x31 = ((uint64_t)(arg1[5]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x32 = ((uint64_t)(arg1[5]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x33 = ((uint64_t)(arg1[5]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x34 = ((uint64_t)(arg1[5]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13))); + uint64_t x35 = ((uint64_t)(arg1[5]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x36 = ((uint64_t)(arg1[4]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13))); + uint64_t x37 = ((uint64_t)(arg1[4]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x38 = ((uint64_t)(arg1[4]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13))); + uint64_t x39 = ((uint64_t)(arg1[4]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13))); + uint64_t x40 = ((uint64_t)(arg1[3]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x41 = ((uint64_t)(arg1[3]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x42 = ((uint64_t)(arg1[3]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x43 = ((uint64_t)(arg1[2]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13))); + uint64_t x44 = ((uint64_t)(arg1[2]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13))); + uint64_t x45 = ((uint64_t)(arg1[1]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13)))); + uint64_t x46 = ((uint64_t)(arg1[9]) * (arg2[0])); + uint64_t x47 = ((uint64_t)(arg1[8]) * (arg2[1])); + uint64_t x48 = ((uint64_t)(arg1[8]) * (arg2[0])); + uint64_t x49 = ((uint64_t)(arg1[7]) * (arg2[2])); + uint64_t x50 = ((uint64_t)(arg1[7]) * ((arg2[1]) * (uint32_t)0x2)); + uint64_t x51 = ((uint64_t)(arg1[7]) * (arg2[0])); + uint64_t x52 = ((uint64_t)(arg1[6]) * (arg2[3])); + uint64_t x53 = ((uint64_t)(arg1[6]) * (arg2[2])); + uint64_t x54 = ((uint64_t)(arg1[6]) * (arg2[1])); + uint64_t x55 = ((uint64_t)(arg1[6]) * (arg2[0])); + uint64_t x56 = ((uint64_t)(arg1[5]) * (arg2[4])); + uint64_t x57 = ((uint64_t)(arg1[5]) * ((arg2[3]) * (uint32_t)0x2)); + uint64_t x58 = ((uint64_t)(arg1[5]) * (arg2[2])); + uint64_t x59 = ((uint64_t)(arg1[5]) * ((arg2[1]) * (uint32_t)0x2)); + uint64_t x60 = ((uint64_t)(arg1[5]) * (arg2[0])); + uint64_t x61 = ((uint64_t)(arg1[4]) * (arg2[5])); + uint64_t x62 = ((uint64_t)(arg1[4]) * (arg2[4])); + uint64_t x63 = ((uint64_t)(arg1[4]) * (arg2[3])); + uint64_t x64 = ((uint64_t)(arg1[4]) * (arg2[2])); + uint64_t x65 = ((uint64_t)(arg1[4]) * (arg2[1])); + uint64_t x66 = ((uint64_t)(arg1[4]) * (arg2[0])); + uint64_t x67 = ((uint64_t)(arg1[3]) * (arg2[6])); + uint64_t x68 = ((uint64_t)(arg1[3]) * ((arg2[5]) * (uint32_t)0x2)); + uint64_t x69 = ((uint64_t)(arg1[3]) * (arg2[4])); + uint64_t x70 = ((uint64_t)(arg1[3]) * ((arg2[3]) * (uint32_t)0x2)); + uint64_t x71 = ((uint64_t)(arg1[3]) * (arg2[2])); + uint64_t x72 = ((uint64_t)(arg1[3]) * ((arg2[1]) * (uint32_t)0x2)); + uint64_t x73 = ((uint64_t)(arg1[3]) * (arg2[0])); + uint64_t x74 = ((uint64_t)(arg1[2]) * (arg2[7])); + uint64_t x75 = ((uint64_t)(arg1[2]) * (arg2[6])); + uint64_t x76 = ((uint64_t)(arg1[2]) * (arg2[5])); + uint64_t x77 = ((uint64_t)(arg1[2]) * (arg2[4])); + uint64_t x78 = ((uint64_t)(arg1[2]) * (arg2[3])); + uint64_t x79 = ((uint64_t)(arg1[2]) * (arg2[2])); + uint64_t x80 = ((uint64_t)(arg1[2]) * (arg2[1])); + uint64_t x81 = ((uint64_t)(arg1[2]) * (arg2[0])); + uint64_t x82 = ((uint64_t)(arg1[1]) * (arg2[8])); + uint64_t x83 = ((uint64_t)(arg1[1]) * ((arg2[7]) * (uint32_t)0x2)); + uint64_t x84 = ((uint64_t)(arg1[1]) * (arg2[6])); + uint64_t x85 = ((uint64_t)(arg1[1]) * ((arg2[5]) * (uint32_t)0x2)); + uint64_t x86 = ((uint64_t)(arg1[1]) * (arg2[4])); + uint64_t x87 = ((uint64_t)(arg1[1]) * ((arg2[3]) * (uint32_t)0x2)); + uint64_t x88 = ((uint64_t)(arg1[1]) * (arg2[2])); + uint64_t x89 = ((uint64_t)(arg1[1]) * ((arg2[1]) * (uint32_t)0x2)); + uint64_t x90 = ((uint64_t)(arg1[1]) * (arg2[0])); + uint64_t x91 = ((uint64_t)(arg1[0]) * (arg2[9])); + uint64_t x92 = ((uint64_t)(arg1[0]) * (arg2[8])); + uint64_t x93 = ((uint64_t)(arg1[0]) * (arg2[7])); + uint64_t x94 = ((uint64_t)(arg1[0]) * (arg2[6])); + uint64_t x95 = ((uint64_t)(arg1[0]) * (arg2[5])); + uint64_t x96 = ((uint64_t)(arg1[0]) * (arg2[4])); + uint64_t x97 = ((uint64_t)(arg1[0]) * (arg2[3])); + uint64_t x98 = ((uint64_t)(arg1[0]) * (arg2[2])); + uint64_t x99 = ((uint64_t)(arg1[0]) * (arg2[1])); + uint64_t x100 = ((uint64_t)(arg1[0]) * (arg2[0])); + uint64_t x101 = (x100 + (x45 + (x44 + (x42 + (x39 + (x35 + (x30 + (x24 + (x17 + x9))))))))); + uint64_t x102 = (x101 >> 26); + uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff)); + uint64_t x104 = (x91 + (x82 + (x74 + (x67 + (x61 + (x56 + (x52 + (x49 + (x47 + x46))))))))); + uint64_t x105 = (x92 + (x83 + (x75 + (x68 + (x62 + (x57 + (x53 + (x50 + (x48 + x1))))))))); + uint64_t x106 = (x93 + (x84 + (x76 + (x69 + (x63 + (x58 + (x54 + (x51 + (x10 + x2))))))))); + uint64_t x107 = (x94 + (x85 + (x77 + (x70 + (x64 + (x59 + (x55 + (x18 + (x11 + x3))))))))); + uint64_t x108 = (x95 + (x86 + (x78 + (x71 + (x65 + (x60 + (x25 + (x19 + (x12 + x4))))))))); + uint64_t x109 = (x96 + (x87 + (x79 + (x72 + (x66 + (x31 + (x26 + (x20 + (x13 + x5))))))))); + uint64_t x110 = (x97 + (x88 + (x80 + (x73 + (x36 + (x32 + (x27 + (x21 + (x14 + x6))))))))); + uint64_t x111 = (x98 + (x89 + (x81 + (x40 + (x37 + (x33 + (x28 + (x22 + (x15 + x7))))))))); + uint64_t x112 = (x99 + (x90 + (x43 + (x41 + (x38 + (x34 + (x29 + (x23 + (x16 + x8))))))))); + uint64_t x113 = (x102 + x112); + uint64_t x114 = (x113 >> 25); + uint32_t x115 = (uint32_t)(x113 & UINT32_C(0x1ffffff)); + uint64_t x116 = (x114 + x111); + uint64_t x117 = (x116 >> 26); + uint32_t x118 = (uint32_t)(x116 & UINT32_C(0x3ffffff)); + uint64_t x119 = (x117 + x110); + uint64_t x120 = (x119 >> 25); + uint32_t x121 = (uint32_t)(x119 & UINT32_C(0x1ffffff)); + uint64_t x122 = (x120 + x109); + uint64_t x123 = (x122 >> 26); + uint32_t x124 = (uint32_t)(x122 & UINT32_C(0x3ffffff)); + uint64_t x125 = (x123 + x108); + uint64_t x126 = (x125 >> 25); + uint32_t x127 = (uint32_t)(x125 & UINT32_C(0x1ffffff)); + uint64_t x128 = (x126 + x107); + uint64_t x129 = (x128 >> 26); + uint32_t x130 = (uint32_t)(x128 & UINT32_C(0x3ffffff)); + uint64_t x131 = (x129 + x106); + uint64_t x132 = (x131 >> 25); + uint32_t x133 = (uint32_t)(x131 & UINT32_C(0x1ffffff)); + uint64_t x134 = (x132 + x105); + uint64_t x135 = (x134 >> 26); + uint32_t x136 = (uint32_t)(x134 & UINT32_C(0x3ffffff)); + uint64_t x137 = (x135 + x104); + uint64_t x138 = (x137 >> 25); + uint32_t x139 = (uint32_t)(x137 & UINT32_C(0x1ffffff)); + uint64_t x140 = (x138 * (uint64_t)UINT8_C(0x13)); + uint64_t x141 = (x103 + x140); + uint32_t x142 = (uint32_t)(x141 >> 26); + uint32_t x143 = (uint32_t)(x141 & UINT32_C(0x3ffffff)); + uint32_t x144 = (x142 + x115); + uint32_t x145 = (x144 >> 25); + uint32_t x146 = (x144 & UINT32_C(0x1ffffff)); + uint32_t x147 = (x145 + x118); + out1[0] = x143; + out1[1] = x146; + out1[2] = x147; + out1[3] = x121; + out1[4] = x124; + out1[5] = x127; + out1[6] = x130; + out1[7] = x133; + out1[8] = x136; + out1[9] = x139; +} + +static void +fe_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) +{ + assert_fe_loose(arg1); + assert_fe_loose(arg2); + fiat_25519_carry_mul(out1, arg1, arg2); + assert_fe(out1); +} + +static void +fe_mul_ttt(fe *h, const fe *f, const fe *g) +{ + fe_mul(h->v, f->v, g->v); +} + +static void +fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) +{ + fe_mul(h->v, f->v, g->v); +} + +static void +fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) +{ + fe_mul(h->v, f->v, g->v); +} + +static void +fe_sq(uint32_t out[10], const uint32_t in1[10]) +{ + const uint32_t x17 = in1[9]; + const uint32_t x18 = in1[8]; + const uint32_t x16 = in1[7]; + const uint32_t x14 = in1[6]; + const uint32_t x12 = in1[5]; + const uint32_t x10 = in1[4]; + const uint32_t x8 = in1[3]; + const uint32_t x6 = in1[2]; + const uint32_t x4 = in1[1]; + const uint32_t x2 = in1[0]; + uint64_t x19 = ((uint64_t)x2 * x2); + uint64_t x20 = ((uint64_t)(0x2 * x2) * x4); + uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6))); + uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8))); + uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10)); + uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12))); + uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12))); + uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16))); + uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12)))))); + uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17))); + uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17))))); + uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17))); + uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17)))))); + uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17))); + uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17))); + uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17))); + uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17)); + uint64_t x36 = ((uint64_t)(0x2 * x18) * x17); + uint64_t x37 = ((uint64_t)(0x2 * x17) * x17); + uint64_t x38 = (x27 + (x37 << 0x4)); + uint64_t x39 = (x38 + (x37 << 0x1)); + uint64_t x40 = (x39 + x37); + uint64_t x41 = (x26 + (x36 << 0x4)); + uint64_t x42 = (x41 + (x36 << 0x1)); + uint64_t x43 = (x42 + x36); + uint64_t x44 = (x25 + (x35 << 0x4)); + uint64_t x45 = (x44 + (x35 << 0x1)); + uint64_t x46 = (x45 + x35); + uint64_t x47 = (x24 + (x34 << 0x4)); + uint64_t x48 = (x47 + (x34 << 0x1)); + uint64_t x49 = (x48 + x34); + uint64_t x50 = (x23 + (x33 << 0x4)); + uint64_t x51 = (x50 + (x33 << 0x1)); + uint64_t x52 = (x51 + x33); + uint64_t x53 = (x22 + (x32 << 0x4)); + uint64_t x54 = (x53 + (x32 << 0x1)); + uint64_t x55 = (x54 + x32); + uint64_t x56 = (x21 + (x31 << 0x4)); + uint64_t x57 = (x56 + (x31 << 0x1)); + uint64_t x58 = (x57 + x31); + uint64_t x59 = (x20 + (x30 << 0x4)); + uint64_t x60 = (x59 + (x30 << 0x1)); + uint64_t x61 = (x60 + x30); + uint64_t x62 = (x19 + (x29 << 0x4)); + uint64_t x63 = (x62 + (x29 << 0x1)); + uint64_t x64 = (x63 + x29); + uint64_t x65 = (x64 >> 0x1a); + uint32_t x66 = ((uint32_t)x64 & 0x3ffffff); + uint64_t x67 = (x65 + x61); + uint64_t x68 = (x67 >> 0x19); + uint32_t x69 = ((uint32_t)x67 & 0x1ffffff); + uint64_t x70 = (x68 + x58); + uint64_t x71 = (x70 >> 0x1a); + uint32_t x72 = ((uint32_t)x70 & 0x3ffffff); + uint64_t x73 = (x71 + x55); + uint64_t x74 = (x73 >> 0x19); + uint32_t x75 = ((uint32_t)x73 & 0x1ffffff); + uint64_t x76 = (x74 + x52); + uint64_t x77 = (x76 >> 0x1a); + uint32_t x78 = ((uint32_t)x76 & 0x3ffffff); + uint64_t x79 = (x77 + x49); + uint64_t x80 = (x79 >> 0x19); + uint32_t x81 = ((uint32_t)x79 & 0x1ffffff); + uint64_t x82 = (x80 + x46); + uint64_t x83 = (x82 >> 0x1a); + uint32_t x84 = ((uint32_t)x82 & 0x3ffffff); + uint64_t x85 = (x83 + x43); + uint64_t x86 = (x85 >> 0x19); + uint32_t x87 = ((uint32_t)x85 & 0x1ffffff); + uint64_t x88 = (x86 + x40); + uint64_t x89 = (x88 >> 0x1a); + uint32_t x90 = ((uint32_t)x88 & 0x3ffffff); + uint64_t x91 = (x89 + x28); + uint64_t x92 = (x91 >> 0x19); + uint32_t x93 = ((uint32_t)x91 & 0x1ffffff); + uint64_t x94 = (x66 + (0x13 * x92)); + uint32_t x95 = (uint32_t)(x94 >> 0x1a); + uint32_t x96 = ((uint32_t)x94 & 0x3ffffff); + uint32_t x97 = (x95 + x69); + uint32_t x98 = (x97 >> 0x19); + uint32_t x99 = (x97 & 0x1ffffff); + out[0] = x96; + out[1] = x99; + out[2] = (x98 + x72); + out[3] = x75; + out[4] = x78; + out[5] = x81; + out[6] = x84; + out[7] = x87; + out[8] = x90; + out[9] = x93; +} + +static void +fe_sq_tl(fe *h, const fe_loose *f) +{ + fe_sq(h->v, f->v); +} + +static void +fe_sq_tt(fe *h, const fe *f) +{ + fe_sq(h->v, f->v); +} + +static inline void +fe_loose_invert(fe *out, const fe_loose *z) +{ + fe t0, t1, t2, t3; + int i; + + fe_sq_tl(&t0, z); + fe_sq_tt(&t1, &t0); + for (i = 1; i < 2; ++i) { + fe_sq_tt(&t1, &t1); + } + fe_mul_tlt(&t1, z, &t1); + fe_mul_ttt(&t0, &t0, &t1); + fe_sq_tt(&t2, &t0); + fe_mul_ttt(&t1, &t1, &t2); + fe_sq_tt(&t2, &t1); + for (i = 1; i < 5; ++i) { + fe_sq_tt(&t2, &t2); + } + fe_mul_ttt(&t1, &t2, &t1); + fe_sq_tt(&t2, &t1); + for (i = 1; i < 10; ++i) { + fe_sq_tt(&t2, &t2); + } + fe_mul_ttt(&t2, &t2, &t1); + fe_sq_tt(&t3, &t2); + for (i = 1; i < 20; ++i) { + fe_sq_tt(&t3, &t3); + } + fe_mul_ttt(&t2, &t3, &t2); + fe_sq_tt(&t2, &t2); + for (i = 1; i < 10; ++i) { + fe_sq_tt(&t2, &t2); + } + fe_mul_ttt(&t1, &t2, &t1); + fe_sq_tt(&t2, &t1); + for (i = 1; i < 50; ++i) { + fe_sq_tt(&t2, &t2); + } + fe_mul_ttt(&t2, &t2, &t1); + fe_sq_tt(&t3, &t2); + for (i = 1; i < 100; ++i) { + fe_sq_tt(&t3, &t3); + } + fe_mul_ttt(&t2, &t3, &t2); + fe_sq_tt(&t2, &t2); + for (i = 1; i < 50; ++i) { + fe_sq_tt(&t2, &t2); + } + fe_mul_ttt(&t1, &t2, &t1); + fe_sq_tt(&t1, &t1); + for (i = 1; i < 5; ++i) { + fe_sq_tt(&t1, &t1); + } + fe_mul_ttt(out, &t1, &t0); +} + +static inline void +fe_invert(fe *out, const fe *z) +{ + fe_loose l; + fe_copy_lt(&l, z); + fe_loose_invert(out, &l); +} + +/* Replace (f,g) with (g,f) if b == 1; + * replace (f,g) with (f,g) if b == 0. + * + * Preconditions: b in {0,1} + */ +static inline void +fe_cswap(fe *f, fe *g, unsigned int b) +{ + PORT_Assert(b < 2); + unsigned int i; + b = 0 - b; + for (i = 0; i < 10; i++) { + uint32_t x = f->v[i] ^ g->v[i]; + x &= b; + f->v[i] ^= x; + g->v[i] ^= x; + } +} + +/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/ +static inline void +fe_mul_121666(uint32_t out[10], const uint32_t in1[10]) +{ + const uint32_t x20 = in1[9]; + const uint32_t x21 = in1[8]; + const uint32_t x19 = in1[7]; + const uint32_t x17 = in1[6]; + const uint32_t x15 = in1[5]; + const uint32_t x13 = in1[4]; + const uint32_t x11 = in1[3]; + const uint32_t x9 = in1[2]; + const uint32_t x7 = in1[1]; + const uint32_t x5 = in1[0]; + const uint32_t x38 = 0; + const uint32_t x39 = 0; + const uint32_t x37 = 0; + const uint32_t x35 = 0; + const uint32_t x33 = 0; + const uint32_t x31 = 0; + const uint32_t x29 = 0; + const uint32_t x27 = 0; + const uint32_t x25 = 0; + const uint32_t x23 = 121666; + uint64_t x40 = ((uint64_t)x23 * x5); + uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); + uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); + uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); + uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); + uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); + uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); + uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); + uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); + uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); + uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); + uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); + uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); + uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); + uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); + uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); + uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); + uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); + uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); + uint64_t x59 = (x48 + (x58 << 0x4)); + uint64_t x60 = (x59 + (x58 << 0x1)); + uint64_t x61 = (x60 + x58); + uint64_t x62 = (x47 + (x57 << 0x4)); + uint64_t x63 = (x62 + (x57 << 0x1)); + uint64_t x64 = (x63 + x57); + uint64_t x65 = (x46 + (x56 << 0x4)); + uint64_t x66 = (x65 + (x56 << 0x1)); + uint64_t x67 = (x66 + x56); + uint64_t x68 = (x45 + (x55 << 0x4)); + uint64_t x69 = (x68 + (x55 << 0x1)); + uint64_t x70 = (x69 + x55); + uint64_t x71 = (x44 + (x54 << 0x4)); + uint64_t x72 = (x71 + (x54 << 0x1)); + uint64_t x73 = (x72 + x54); + uint64_t x74 = (x43 + (x53 << 0x4)); + uint64_t x75 = (x74 + (x53 << 0x1)); + uint64_t x76 = (x75 + x53); + uint64_t x77 = (x42 + (x52 << 0x4)); + uint64_t x78 = (x77 + (x52 << 0x1)); + uint64_t x79 = (x78 + x52); + uint64_t x80 = (x41 + (x51 << 0x4)); + uint64_t x81 = (x80 + (x51 << 0x1)); + uint64_t x82 = (x81 + x51); + uint64_t x83 = (x40 + (x50 << 0x4)); + uint64_t x84 = (x83 + (x50 << 0x1)); + uint64_t x85 = (x84 + x50); + uint64_t x86 = (x85 >> 0x1a); + uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); + uint64_t x88 = (x86 + x82); + uint64_t x89 = (x88 >> 0x19); + uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); + uint64_t x91 = (x89 + x79); + uint64_t x92 = (x91 >> 0x1a); + uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); + uint64_t x94 = (x92 + x76); + uint64_t x95 = (x94 >> 0x19); + uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); + uint64_t x97 = (x95 + x73); + uint64_t x98 = (x97 >> 0x1a); + uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); + uint64_t x100 = (x98 + x70); + uint64_t x101 = (x100 >> 0x19); + uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); + uint64_t x103 = (x101 + x67); + uint64_t x104 = (x103 >> 0x1a); + uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); + uint64_t x106 = (x104 + x64); + uint64_t x107 = (x106 >> 0x19); + uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); + uint64_t x109 = (x107 + x61); + uint64_t x110 = (x109 >> 0x1a); + uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); + uint64_t x112 = (x110 + x49); + uint64_t x113 = (x112 >> 0x19); + uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); + uint64_t x115 = (x87 + (0x13 * x113)); + uint32_t x116 = (uint32_t)(x115 >> 0x1a); + uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); + uint32_t x118 = (x116 + x90); + uint32_t x119 = (x118 >> 0x19); + uint32_t x120 = (x118 & 0x1ffffff); + out[0] = x117; + out[1] = x120; + out[2] = (x119 + x93); + out[3] = x96; + out[4] = x99; + out[5] = x102; + out[6] = x105; + out[7] = x108; + out[8] = x111; + out[9] = x114; +} + +static void +fe_mul_121666_tl(fe *h, const fe_loose *f) +{ + assert_fe_loose(f->v); + fe_mul_121666(h->v, f->v); + assert_fe(h->v); +} + +SECStatus +ec_Curve25519_mul(PRUint8 *out, const PRUint8 *scalar, const PRUint8 *point) +{ + fe x1, x2, z2, x3, z3, tmp0, tmp1; + fe_loose x2l, z2l, x3l, tmp0l, tmp1l; + unsigned int swap = 0; + unsigned int b; + int pos; + uint8_t e[32]; + + memcpy(e, scalar, 32); + e[0] &= 0xF8; + e[31] &= 0x7F; + e[31] |= 0x40; + + fe_frombytes(&x1, point); + fe_1(&x2); + fe_0(&z2); + fe_copy(&x3, &x1); + fe_1(&z3); + + for (pos = 254; pos >= 0; --pos) { + b = e[pos / 8] >> (pos & 7); + b &= 1; + swap ^= b; + fe_cswap(&x2, &x3, swap); + fe_cswap(&z2, &z3, swap); + swap = b; + fe_sub(&tmp0l, &x3, &z3); + fe_sub(&tmp1l, &x2, &z2); + fe_add(&x2l, &x2, &z2); + fe_add(&z2l, &x3, &z3); + fe_mul_tll(&z3, &tmp0l, &x2l); + fe_mul_tll(&z2, &z2l, &tmp1l); + fe_sq_tl(&tmp0, &tmp1l); + fe_sq_tl(&tmp1, &x2l); + fe_add(&x3l, &z3, &z2); + fe_sub(&z2l, &z3, &z2); + fe_mul_ttt(&x2, &tmp1, &tmp0); + fe_sub(&tmp1l, &tmp1, &tmp0); + fe_sq_tl(&z2, &z2l); + fe_mul_121666_tl(&z3, &tmp1l); + fe_sq_tl(&x3, &x3l); + fe_add(&tmp0l, &tmp0, &z3); + fe_mul_ttt(&z3, &x1, &z2); + fe_mul_tll(&z2, &tmp1l, &tmp0l); + } + + fe_cswap(&x2, &x3, swap); + fe_cswap(&z2, &z3, swap); + + fe_invert(&z2, &z2); + fe_mul_ttt(&x2, &x2, &z2); + fe_tobytes(out, &x2); + + memset(x1.v, 0, sizeof(x1)); + memset(x2.v, 0, sizeof(x2)); + memset(z2.v, 0, sizeof(z2)); + memset(x3.v, 0, sizeof(x3)); + memset(z3.v, 0, sizeof(z3)); + memset(x2l.v, 0, sizeof(x2l)); + memset(z2l.v, 0, sizeof(z2l)); + memset(x3l.v, 0, sizeof(x3l)); + memset(e, 0, sizeof(e)); + return 0; +} diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c new file mode 100644 index 0000000000..e346bdb8b8 --- /dev/null +++ b/security/nss/lib/freebl/ecl/curve25519_64.c @@ -0,0 +1,24 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-priv.h" + +#if HACL_CAN_COMPILE_INLINE_ASM +#include "../verified/Hacl_Curve25519_64.h" +#else +#include "../verified/Hacl_Curve25519_51.h" +#endif + +SECStatus +ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint) +{ +// Note: this cast is safe because HaCl* state has a post-condition that only "mypublic" changed. +#if defined HACL_CAN_COMPILE_INLINE_ASM + Hacl_Curve25519_64_ecdh(mypublic, (uint8_t *)secret, (uint8_t *)basepoint); +#else + Hacl_Curve25519_51_ecdh(mypublic, (uint8_t *)secret, (uint8_t *)basepoint); +#endif + + return 0; +} diff --git a/security/nss/lib/freebl/ecl/ec_naf.c b/security/nss/lib/freebl/ecl/ec_naf.c new file mode 100644 index 0000000000..cad08cb278 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ec_naf.c @@ -0,0 +1,68 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-priv.h" + +/* Returns 2^e as an integer. This is meant to be used for small powers of + * two. */ +int +ec_twoTo(int e) +{ + int a = 1; + int i; + + for (i = 0; i < e; i++) { + a *= 2; + } + return a; +} + +/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should + * be an array of signed char's to output to, bitsize should be the number + * of bits of out, in is the original scalar, and w is the window size. + * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. + * Menezes, "Software implementation of elliptic curve cryptography over + * binary fields", Proc. CHES 2000. */ +mp_err +ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w) +{ + mp_int k; + mp_err res = MP_OKAY; + int i, twowm1, mask; + + twowm1 = ec_twoTo(w - 1); + mask = 2 * twowm1 - 1; + + MP_DIGITS(&k) = 0; + MP_CHECKOK(mp_init_copy(&k, in)); + + i = 0; + /* Compute wNAF form */ + while (mp_cmp_z(&k) > 0) { + if (mp_isodd(&k)) { + out[i] = MP_DIGIT(&k, 0) & mask; + if (out[i] >= twowm1) + out[i] -= 2 * twowm1; + + /* Subtract off out[i]. Note mp_sub_d only works with + * unsigned digits */ + if (out[i] >= 0) { + MP_CHECKOK(mp_sub_d(&k, out[i], &k)); + } else { + MP_CHECKOK(mp_add_d(&k, -(out[i]), &k)); + } + } else { + out[i] = 0; + } + MP_CHECKOK(mp_div_2(&k, &k)); + i++; + } + /* Zero out the remaining elements of the out array. */ + for (; i < bitsize + 1; i++) { + out[i] = 0; + } +CLEANUP: + mp_clear(&k); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecl-curve.h b/security/nss/lib/freebl/ecl/ecl-curve.h new file mode 100644 index 0000000000..dec3ce387d --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-curve.h @@ -0,0 +1,245 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-exp.h" +#include "eclt.h" +#include + +#ifndef __ecl_curve_h_ +#define __ecl_curve_h_ + +/* copied from certt.h */ +#define KU_DIGITAL_SIGNATURE (0x80) /* bit 0 */ +#define KU_KEY_AGREEMENT (0x08) /* bit 4 */ + +static const PRUint8 irr256[32] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static const PRUint8 a256[32] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC }; +static const PRUint8 b256[32] = { 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55, + 0x76, 0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, + 0x3B, 0xCE, 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B }; +static const PRUint8 x256[32] = { 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, + 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, + 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96 }; +static const PRUint8 y256[32] = { 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, + 0x7C, 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, + 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5 }; +static const PRUint8 order256[32] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, + 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51 }; +static const PRUint8 base256[66] = { 0x04, 0x00, + 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, + 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, + 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96, + 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, + 0x7C, 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, + 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5 }; + +static const ECCurveBytes ecCurve_NIST_P256 = { + "NIST-P256", ECField_GFp, 256, + irr256, a256, b256, x256, y256, order256, base256, + 1, 128, 66, 32, + KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const PRUint8 irr384[48] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF }; +static const PRUint8 a384[48] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC }; +static const PRUint8 b384[48] = { 0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, 0x98, 0x8E, 0x05, 0x6B, + 0xE3, 0xF8, 0x2D, 0x19, 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12, + 0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, 0xC6, 0x56, 0x39, 0x8D, + 0x8A, 0x2E, 0xD1, 0x9D, 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF }; +static const PRUint8 x384[48] = { 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E, + 0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98, + 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D, + 0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7 }; +static const PRUint8 y384[48] = { 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF, + 0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C, + 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE, + 0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F }; +static const PRUint8 order384[48] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, 0x58, 0x1A, 0x0D, 0xB2, + 0x48, 0xB0, 0xA7, 0x7A, 0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73 }; +static const PRUint8 base384[98] = { 0x04, 0x00, + 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E, + 0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98, + 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D, + 0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7, + 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF, + 0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C, + 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE, + 0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F }; + +static const ECCurveBytes ecCurve_NIST_P384 = { + "NIST-P384", ECField_GFp, 384, + irr384, a384, b384, x384, y384, order384, base384, + 1, 192, 98, 48, + KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const PRUint8 irr521[66] = { 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static const PRUint8 a521[66] = { 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC }; +static const PRUint8 b521[66] = { 0x00, 0x51, 0x95, 0x3E, 0xB9, 0x61, 0x8E, 0x1C, 0x9A, 0x1F, 0x92, 0x9A, + 0x21, 0xA0, 0xB6, 0x85, 0x40, 0xEE, 0xA2, 0xDA, 0x72, 0x5B, 0x99, 0xB3, + 0x15, 0xF3, 0xB8, 0xB4, 0x89, 0x91, 0x8E, 0xF1, 0x09, 0xE1, 0x56, 0x19, + 0x39, 0x51, 0xEC, 0x7E, 0x93, 0x7B, 0x16, 0x52, 0xC0, 0xBD, 0x3B, 0xB1, + 0xBF, 0x07, 0x35, 0x73, 0xDF, 0x88, 0x3D, 0x2C, 0x34, 0xF1, 0xEF, 0x45, + 0x1F, 0xD4, 0x6B, 0x50, 0x3F, 0x00 }; +static const PRUint8 x521[66] = { 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9, 0xCD, 0x9E, 0x3E, + 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, 0x3F, + 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1, 0x4B, + 0x5E, 0x77, 0xEF, 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF, + 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9, 0x7E, + 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66 }; +static const PRUint8 y521[66] = { 0x01, 0x18, 0x39, 0x29, 0x6A, 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, + 0x5F, 0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B, + 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, + 0x72, 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD, + 0x07, 0x61, 0x35, 0x3C, 0x70, 0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, + 0x94, 0x76, 0x9F, 0xD1, 0x66, 0x50 }; +static const PRUint8 order521[66] = { 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFA, 0x51, 0x86, + 0x87, 0x83, 0xBF, 0x2F, 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09, + 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, 0x47, 0xAE, 0xBB, 0x6F, + 0xB7, 0x1E, 0x91, 0x38, 0x64, 0x09 }; +static const PRUint8 base521[134] = { + 0x04, 0x00, + 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9, 0xCD, 0x9E, 0x3E, + 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, 0x3F, + 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1, 0x4B, + 0x5E, 0x77, 0xEF, 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF, + 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9, 0x7E, + 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66, + 0x01, 0x18, 0x39, 0x29, 0x6A, 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, + 0x5F, 0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B, + 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, + 0x72, 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD, + 0x07, 0x61, 0x35, 0x3C, 0x70, 0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, + 0x94, 0x76, 0x9F, 0xD1, 0x66, 0x50 +}; + +static const ECCurveBytes ecCurve_NIST_P521 = { + "NIST-P521", ECField_GFp, 521, + irr521, a521, b521, x521, y521, order521, base521, + 1, 256, 134, 66, + KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const PRUint8 irr25519[32] = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }; +static const PRUint8 a25519[32] = { 0x06, 0x6d, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; +static const PRUint8 b25519[32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; +static const PRUint8 x25519[32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09 }; +static const PRUint8 y25519[32] = { 0xd9, 0xd3, 0xce, 0x7e, 0xa2, 0xc5, 0xe9, 0x29, 0xb2, 0x61, 0x7c, 0x6d, + 0x7e, 0x4d, 0x3d, 0x92, 0x4c, 0xd1, 0x48, 0x77, 0x2c, 0xdd, 0x1e, 0xe0, + 0xb4, 0x86, 0xa0, 0xb8, 0xa1, 0x19, 0xae, 0x20 }; +static const PRUint8 order25519[32] = { 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, 0xa2, + 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; +static const PRUint8 base25519[66] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + 0xd9, 0xd3, 0xce, 0x7e, 0xa2, 0xc5, 0xe9, 0x29, 0xb2, 0x61, 0x7c, 0x6d, + 0x7e, 0x4d, 0x3d, 0x92, 0x4c, 0xd1, 0x48, 0x77, 0x2c, 0xdd, 0x1e, 0xe0, + 0xb4, 0x86, 0xa0, 0xb8, 0xa1, 0x19, 0xae, 0x20, 0x00, 0x04 }; + +static const ECCurveBytes ecCurve_25519 = { + "Curve25519", ECField_GFp, 255, + irr25519, a25519, b25519, x25519, y25519, order25519, base25519, + 8, 128, 66, 32, + KU_KEY_AGREEMENT +}; + +/* mapping between ECCurveName enum and pointers to ECCurveParams */ +static const ECCurveBytes *ecCurve_map[] = { + NULL, /* ECCurve_noName */ + NULL, /* ECCurve_NIST_P192 */ + NULL, /* ECCurve_NIST_P224 */ + &ecCurve_NIST_P256, /* ECCurve_NIST_P256 */ + &ecCurve_NIST_P384, /* ECCurve_NIST_P384 */ + &ecCurve_NIST_P521, /* ECCurve_NIST_P521 */ + NULL, /* ECCurve_NIST_K163 */ + NULL, /* ECCurve_NIST_B163 */ + NULL, /* ECCurve_NIST_K233 */ + NULL, /* ECCurve_NIST_B233 */ + NULL, /* ECCurve_NIST_K283 */ + NULL, /* ECCurve_NIST_B283 */ + NULL, /* ECCurve_NIST_K409 */ + NULL, /* ECCurve_NIST_B409 */ + NULL, /* ECCurve_NIST_K571 */ + NULL, /* ECCurve_NIST_B571 */ + NULL, /* ECCurve_X9_62_PRIME_192V2 */ + NULL, /* ECCurve_X9_62_PRIME_192V3 */ + NULL, /* ECCurve_X9_62_PRIME_239V1 */ + NULL, /* ECCurve_X9_62_PRIME_239V2 */ + NULL, /* ECCurve_X9_62_PRIME_239V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V2 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB176V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V2 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB208W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V2 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB272W1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB304W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB359V1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB368W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB431R1 */ + NULL, /* ECCurve_SECG_PRIME_112R1 */ + NULL, /* ECCurve_SECG_PRIME_112R2 */ + NULL, /* ECCurve_SECG_PRIME_128R1 */ + NULL, /* ECCurve_SECG_PRIME_128R2 */ + NULL, /* ECCurve_SECG_PRIME_160K1 */ + NULL, /* ECCurve_SECG_PRIME_160R1 */ + NULL, /* ECCurve_SECG_PRIME_160R2 */ + NULL, /* ECCurve_SECG_PRIME_192K1 */ + NULL, /* ECCurve_SECG_PRIME_224K1 */ + NULL, /* ECCurve_SECG_PRIME_256K1 */ + NULL, /* ECCurve_SECG_CHAR2_113R1 */ + NULL, /* ECCurve_SECG_CHAR2_113R2 */ + NULL, /* ECCurve_SECG_CHAR2_131R1 */ + NULL, /* ECCurve_SECG_CHAR2_131R2 */ + NULL, /* ECCurve_SECG_CHAR2_163R1 */ + NULL, /* ECCurve_SECG_CHAR2_193R1 */ + NULL, /* ECCurve_SECG_CHAR2_193R2 */ + NULL, /* ECCurve_SECG_CHAR2_239K1 */ + NULL, /* ECCurve_WTLS_1 */ + NULL, /* ECCurve_WTLS_8 */ + NULL, /* ECCurve_WTLS_9 */ + &ecCurve_25519, /* ECCurve25519 */ + NULL /* ECCurve_pastLastCurve */ +}; + +#endif diff --git a/security/nss/lib/freebl/ecl/ecl-exp.h b/security/nss/lib/freebl/ecl/ecl-exp.h new file mode 100644 index 0000000000..44adb8a1cd --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-exp.h @@ -0,0 +1,167 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecl_exp_h_ +#define __ecl_exp_h_ + +/* Curve field type */ +typedef enum { + ECField_GFp, + ECField_GF2m +} ECField; + +/* Hexadecimal encoding of curve parameters */ +struct ECCurveParamsStr { + char *text; + ECField field; + unsigned int size; + char *irr; + char *curvea; + char *curveb; + char *genx; + char *geny; + char *order; + int cofactor; + int security; + int pointSize; + unsigned int usage; +}; +typedef struct ECCurveParamsStr ECCurveParams; + +/* Named curve parameters */ +typedef enum { + + ECCurve_noName = 0, + + /* NIST prime curves */ + ECCurve_NIST_P192, /* not supported */ + ECCurve_NIST_P224, /* not supported */ + ECCurve_NIST_P256, + ECCurve_NIST_P384, + ECCurve_NIST_P521, + + /* NIST binary curves */ + ECCurve_NIST_K163, /* not supported */ + ECCurve_NIST_B163, /* not supported */ + ECCurve_NIST_K233, /* not supported */ + ECCurve_NIST_B233, /* not supported */ + ECCurve_NIST_K283, /* not supported */ + ECCurve_NIST_B283, /* not supported */ + ECCurve_NIST_K409, /* not supported */ + ECCurve_NIST_B409, /* not supported */ + ECCurve_NIST_K571, /* not supported */ + ECCurve_NIST_B571, /* not supported */ + + /* ANSI X9.62 prime curves */ + /* ECCurve_X9_62_PRIME_192V1 == ECCurve_NIST_P192 */ + ECCurve_X9_62_PRIME_192V2, /* not supported */ + ECCurve_X9_62_PRIME_192V3, /* not supported */ + ECCurve_X9_62_PRIME_239V1, /* not supported */ + ECCurve_X9_62_PRIME_239V2, /* not supported */ + ECCurve_X9_62_PRIME_239V3, /* not supported */ + /* ECCurve_X9_62_PRIME_256V1 == ECCurve_NIST_P256 */ + + /* ANSI X9.62 binary curves */ + ECCurve_X9_62_CHAR2_PNB163V1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB163V2, /* not supported */ + ECCurve_X9_62_CHAR2_PNB163V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB176V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V2, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB208W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V2, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB272W1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB304W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB359V1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB368W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB431R1, /* not supported */ + + /* SEC2 prime curves */ + ECCurve_SECG_PRIME_112R1, /* not supported */ + ECCurve_SECG_PRIME_112R2, /* not supported */ + ECCurve_SECG_PRIME_128R1, /* not supported */ + ECCurve_SECG_PRIME_128R2, /* not supported */ + ECCurve_SECG_PRIME_160K1, /* not supported */ + ECCurve_SECG_PRIME_160R1, /* not supported */ + ECCurve_SECG_PRIME_160R2, /* not supported */ + ECCurve_SECG_PRIME_192K1, /* not supported */ + /* ECCurve_SECG_PRIME_192R1 == ECCurve_NIST_P192 */ + ECCurve_SECG_PRIME_224K1, /* not supported */ + /* ECCurve_SECG_PRIME_224R1 == ECCurve_NIST_P224 */ + ECCurve_SECG_PRIME_256K1, /* not supported */ + /* ECCurve_SECG_PRIME_256R1 == ECCurve_NIST_P256 */ + /* ECCurve_SECG_PRIME_384R1 == ECCurve_NIST_P384 */ + /* ECCurve_SECG_PRIME_521R1 == ECCurve_NIST_P521 */ + + /* SEC2 binary curves */ + ECCurve_SECG_CHAR2_113R1, /* not supported */ + ECCurve_SECG_CHAR2_113R2, /* not supported */ + ECCurve_SECG_CHAR2_131R1, /* not supported */ + ECCurve_SECG_CHAR2_131R2, /* not supported */ + /* ECCurve_SECG_CHAR2_163K1 == ECCurve_NIST_K163 */ + ECCurve_SECG_CHAR2_163R1, /* not supported */ + /* ECCurve_SECG_CHAR2_163R2 == ECCurve_NIST_B163 */ + ECCurve_SECG_CHAR2_193R1, /* not supported */ + ECCurve_SECG_CHAR2_193R2, /* not supported */ + /* ECCurve_SECG_CHAR2_233K1 == ECCurve_NIST_K233 */ + /* ECCurve_SECG_CHAR2_233R1 == ECCurve_NIST_B233 */ + ECCurve_SECG_CHAR2_239K1, /* not supported */ + /* ECCurve_SECG_CHAR2_283K1 == ECCurve_NIST_K283 */ + /* ECCurve_SECG_CHAR2_283R1 == ECCurve_NIST_B283 */ + /* ECCurve_SECG_CHAR2_409K1 == ECCurve_NIST_K409 */ + /* ECCurve_SECG_CHAR2_409R1 == ECCurve_NIST_B409 */ + /* ECCurve_SECG_CHAR2_571K1 == ECCurve_NIST_K571 */ + /* ECCurve_SECG_CHAR2_571R1 == ECCurve_NIST_B571 */ + + /* WTLS curves */ + ECCurve_WTLS_1, /* not supported */ + /* there is no WTLS 2 curve */ + /* ECCurve_WTLS_3 == ECCurve_NIST_K163 */ + /* ECCurve_WTLS_4 == ECCurve_SECG_CHAR2_113R1 */ + /* ECCurve_WTLS_5 == ECCurve_X9_62_CHAR2_PNB163V1 */ + /* ECCurve_WTLS_6 == ECCurve_SECG_PRIME_112R1 */ + /* ECCurve_WTLS_7 == ECCurve_SECG_PRIME_160R1 */ + ECCurve_WTLS_8, /* not supported */ + ECCurve_WTLS_9, /* not supported */ + /* ECCurve_WTLS_10 == ECCurve_NIST_K233 */ + /* ECCurve_WTLS_11 == ECCurve_NIST_B233 */ + /* ECCurve_WTLS_12 == ECCurve_NIST_P224 */ + + ECCurve25519, + + ECCurve_pastLastCurve +} ECCurveName; + +/* Aliased named curves */ + +#define ECCurve_X9_62_PRIME_192V1 ECCurve_NIST_P192 /* not supported */ +#define ECCurve_X9_62_PRIME_256V1 ECCurve_NIST_P256 +#define ECCurve_SECG_PRIME_192R1 ECCurve_NIST_P192 /* not supported */ +#define ECCurve_SECG_PRIME_224R1 ECCurve_NIST_P224 /* not supported */ +#define ECCurve_SECG_PRIME_256R1 ECCurve_NIST_P256 +#define ECCurve_SECG_PRIME_384R1 ECCurve_NIST_P384 +#define ECCurve_SECG_PRIME_521R1 ECCurve_NIST_P521 +#define ECCurve_SECG_CHAR2_163K1 ECCurve_NIST_K163 /* not supported */ +#define ECCurve_SECG_CHAR2_163R2 ECCurve_NIST_B163 /* not supported */ +#define ECCurve_SECG_CHAR2_233K1 ECCurve_NIST_K233 /* not supported */ +#define ECCurve_SECG_CHAR2_233R1 ECCurve_NIST_B233 /* not supported */ +#define ECCurve_SECG_CHAR2_283K1 ECCurve_NIST_K283 /* not supported */ +#define ECCurve_SECG_CHAR2_283R1 ECCurve_NIST_B283 /* not supported */ +#define ECCurve_SECG_CHAR2_409K1 ECCurve_NIST_K409 /* not supported */ +#define ECCurve_SECG_CHAR2_409R1 ECCurve_NIST_B409 /* not supported */ +#define ECCurve_SECG_CHAR2_571K1 ECCurve_NIST_K571 /* not supported */ +#define ECCurve_SECG_CHAR2_571R1 ECCurve_NIST_B571 /* not supported */ +#define ECCurve_WTLS_3 ECCurve_NIST_K163 /* not supported */ +#define ECCurve_WTLS_4 ECCurve_SECG_CHAR2_113R1 /* not supported */ +#define ECCurve_WTLS_5 ECCurve_X9_62_CHAR2_PNB163V1 /* not supported */ +#define ECCurve_WTLS_6 ECCurve_SECG_PRIME_112R1 /* not supported */ +#define ECCurve_WTLS_7 ECCurve_SECG_PRIME_160R1 /* not supported */ +#define ECCurve_WTLS_10 ECCurve_NIST_K233 /* not supported */ +#define ECCurve_WTLS_11 ECCurve_NIST_B233 /* not supported */ +#define ECCurve_WTLS_12 ECCurve_NIST_P224 /* not supported */ + +#endif /* __ecl_exp_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl-priv.h b/security/nss/lib/freebl/ecl/ecl-priv.h new file mode 100644 index 0000000000..c1e0e856b3 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-priv.h @@ -0,0 +1,252 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecl_priv_h_ +#define __ecl_priv_h_ + +#include "ecl.h" +#include "mpi.h" +#include "mplogic.h" +#include "../blapii.h" + +/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */ +/* the following needs to go away... */ +#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT) +#define ECL_SIXTY_FOUR_BIT +#else +#define ECL_THIRTY_TWO_BIT +#endif + +#define ECL_CURVE_DIGITS(curve_size_in_bits) \ + (((curve_size_in_bits) + (sizeof(mp_digit) * 8 - 1)) / (sizeof(mp_digit) * 8)) +#define ECL_BITS (sizeof(mp_digit) * 8) +#define ECL_MAX_FIELD_SIZE_DIGITS (80 / sizeof(mp_digit)) + +/* Gets the i'th bit in the binary representation of a. If i >= length(a), + * then return 0. (The above behaviour differs from mpl_get_bit, which + * causes an error if i >= length(a).) */ +#define MP_GET_BIT(a, i) \ + ((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i)) + +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) +#define MP_ADD_CARRY(a1, a2, s, carry) \ + { \ + mp_word w; \ + w = ((mp_word)carry) + (a1) + (a2); \ + s = ACCUM(w); \ + carry = CARRYOUT(w); \ + } + +#define MP_SUB_BORROW(a1, a2, s, borrow) \ + { \ + mp_word w; \ + w = ((mp_word)(a1)) - (a2)-borrow; \ + s = ACCUM(w); \ + borrow = (w >> MP_DIGIT_BIT) & 1; \ + } + +#else +/* NOTE, + * carry and borrow are both read and written. + * a1 or a2 and s could be the same variable. + * don't trash those outputs until their respective inputs have + * been read. */ +#define MP_ADD_CARRY(a1, a2, s, carry) \ + { \ + mp_digit tmp, sum; \ + tmp = (a1); \ + sum = tmp + (a2); \ + tmp = (sum < tmp); /* detect overflow */ \ + s = sum += carry; \ + carry = tmp + (sum < carry); \ + } + +#define MP_SUB_BORROW(a1, a2, s, borrow) \ + { \ + mp_digit tmp; \ + tmp = (a1); \ + s = tmp - (a2); \ + tmp = (s > tmp); /* detect borrow */ \ + if (borrow && !s--) \ + tmp++; \ + borrow = tmp; \ + } +#endif + +struct GFMethodStr; +typedef struct GFMethodStr GFMethod; +struct GFMethodStr { + /* Indicates whether the structure was constructed from dynamic memory + * or statically created. */ + int constructed; + /* Irreducible that defines the field. For prime fields, this is the + * prime p. For binary polynomial fields, this is the bitstring + * representation of the irreducible polynomial. */ + mp_int irr; + /* For prime fields, the value irr_arr[0] is the number of bits in the + * field. For binary polynomial fields, the irreducible polynomial + * f(t) is represented as an array of unsigned int[], where f(t) is + * of the form: f(t) = t^p[0] + t^p[1] + ... + t^p[4] where m = p[0] + * > p[1] > ... > p[4] = 0. */ + unsigned int irr_arr[5]; + /* Field arithmetic methods. All methods (except field_enc and + * field_dec) are assumed to take field-encoded parameters and return + * field-encoded values. All methods (except field_enc and field_dec) + * are required to be implemented. */ + mp_err (*field_add)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_neg)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_sub)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_mod)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_mul)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_sqr)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_div)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_enc)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_dec)(const mp_int *a, mp_int *r, const GFMethod *meth); + /* Extra storage for implementation-specific data. Any memory + * allocated to these extra fields will be cleared by extra_free. */ + void *extra1; + void *extra2; + void (*extra_free)(GFMethod *meth); +}; + +/* Construct generic GFMethods. */ +GFMethod *GFMethod_consGFp(const mp_int *irr); +GFMethod *GFMethod_consGFp_mont(const mp_int *irr); + +/* Free the memory allocated (if any) to a GFMethod object. */ +void GFMethod_free(GFMethod *meth); + +struct ECGroupStr { + /* Indicates whether the structure was constructed from dynamic memory + * or statically created. */ + int constructed; + /* Field definition and arithmetic. */ + GFMethod *meth; + /* Textual representation of curve name, if any. */ + char *text; + /* Curve parameters, field-encoded. */ + mp_int curvea, curveb; + /* x and y coordinates of the base point, field-encoded. */ + mp_int genx, geny; + /* Order and cofactor of the base point. */ + mp_int order; + int cofactor; + /* Point arithmetic methods. All methods are assumed to take + * field-encoded parameters and return field-encoded values. All + * methods (except base_point_mul and points_mul) are required to be + * implemented. */ + mp_err (*point_add)(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_sub)(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_dbl)(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_mul)(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); + mp_err (*base_point_mul)(const mp_int *n, mp_int *rx, mp_int *ry, + const ECGroup *group); + mp_err (*points_mul)(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*validate_point)(const mp_int *px, const mp_int *py, const ECGroup *group); + /* Extra storage for implementation-specific data. Any memory + * allocated to these extra fields will be cleared by extra_free. */ + void *extra1; + void *extra2; + void (*extra_free)(ECGroup *group); +}; + +/* Wrapper functions for generic prime field arithmetic. */ +mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +/* fixed length in-line adds. Count is in words */ +mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +/* Wrapper functions for generic binary polynomial field arithmetic. */ +mp_err ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +/* Montgomery prime field arithmetic. */ +mp_err ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +void ec_GFp_extra_free_mont(GFMethod *meth); + +/* point multiplication */ +mp_err ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); +mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should + * be an array of signed char's to output to, bitsize should be the number + * of bits of out, in is the original scalar, and w is the window size. + * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. + * Menezes, "Software implementation of elliptic curve cryptography over + * binary fields", Proc. CHES 2000. */ +mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, + int w); + +/* Optimized field arithmetic */ +mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName); +mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name); +mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name); +mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name); + +/* Optimized point multiplication */ +mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name); +mp_err ec_group_set_secp384r1(ECGroup *group, ECCurveName name); +mp_err ec_group_set_secp521r1(ECGroup *group, ECCurveName name); + +SECStatus ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p); +#endif /* __ecl_priv_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl.c b/security/nss/lib/freebl/ecl/ecl.c new file mode 100644 index 0000000000..e34a73c661 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl.c @@ -0,0 +1,329 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecl-priv.h" +#include "ecp.h" +#include "ecl-curve.h" +#include +#include + +/* Allocate memory for a new ECGroup object. */ +ECGroup * +ECGroup_new() +{ + mp_err res = MP_OKAY; + ECGroup *group; + group = (ECGroup *)malloc(sizeof(ECGroup)); + if (group == NULL) + return NULL; + group->constructed = MP_YES; + group->meth = NULL; + group->text = NULL; + MP_DIGITS(&group->curvea) = 0; + MP_DIGITS(&group->curveb) = 0; + MP_DIGITS(&group->genx) = 0; + MP_DIGITS(&group->geny) = 0; + MP_DIGITS(&group->order) = 0; + group->base_point_mul = NULL; + group->points_mul = NULL; + group->validate_point = NULL; + group->extra1 = NULL; + group->extra2 = NULL; + group->extra_free = NULL; + MP_CHECKOK(mp_init(&group->curvea)); + MP_CHECKOK(mp_init(&group->curveb)); + MP_CHECKOK(mp_init(&group->genx)); + MP_CHECKOK(mp_init(&group->geny)); + MP_CHECKOK(mp_init(&group->order)); + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct a generic ECGroup for elliptic curves over prime fields. */ +ECGroup * +ECGroup_consGFp(const mp_int *irr, const mp_int *curvea, + const mp_int *curveb, const mp_int *genx, + const mp_int *geny, const mp_int *order, int cofactor) +{ + mp_err res = MP_OKAY; + ECGroup *group = NULL; + + group = ECGroup_new(); + if (group == NULL) + return NULL; + + group->meth = GFMethod_consGFp(irr); + if (group->meth == NULL) { + res = MP_MEM; + goto CLEANUP; + } + MP_CHECKOK(mp_copy(curvea, &group->curvea)); + MP_CHECKOK(mp_copy(curveb, &group->curveb)); + MP_CHECKOK(mp_copy(genx, &group->genx)); + MP_CHECKOK(mp_copy(geny, &group->geny)); + MP_CHECKOK(mp_copy(order, &group->order)); + group->cofactor = cofactor; + group->point_add = &ec_GFp_pt_add_aff; + group->point_sub = &ec_GFp_pt_sub_aff; + group->point_dbl = &ec_GFp_pt_dbl_aff; + group->point_mul = &ec_GFp_pt_mul_jm_wNAF; + group->base_point_mul = NULL; + group->points_mul = &ec_GFp_pts_mul_jac; + group->validate_point = &ec_GFp_validate_point; + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct a generic ECGroup for elliptic curves over prime fields with + * field arithmetic implemented in Montgomery coordinates. */ +ECGroup * +ECGroup_consGFp_mont(const mp_int *irr, const mp_int *curvea, + const mp_int *curveb, const mp_int *genx, + const mp_int *geny, const mp_int *order, int cofactor) +{ + mp_err res = MP_OKAY; + ECGroup *group = NULL; + + group = ECGroup_new(); + if (group == NULL) + return NULL; + + group->meth = GFMethod_consGFp_mont(irr); + if (group->meth == NULL) { + res = MP_MEM; + goto CLEANUP; + } + MP_CHECKOK(group->meth->field_enc(curvea, &group->curvea, group->meth)); + MP_CHECKOK(group->meth->field_enc(curveb, &group->curveb, group->meth)); + MP_CHECKOK(group->meth->field_enc(genx, &group->genx, group->meth)); + MP_CHECKOK(group->meth->field_enc(geny, &group->geny, group->meth)); + MP_CHECKOK(mp_copy(order, &group->order)); + group->cofactor = cofactor; + group->point_add = &ec_GFp_pt_add_aff; + group->point_sub = &ec_GFp_pt_sub_aff; + group->point_dbl = &ec_GFp_pt_dbl_aff; + group->point_mul = &ec_GFp_pt_mul_jm_wNAF; + group->base_point_mul = NULL; + group->points_mul = &ec_GFp_pts_mul_jac; + group->validate_point = &ec_GFp_validate_point; + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct an ECGroup. */ +ECGroup * +construct_ecgroup(const ECCurveName name, mp_int irr, mp_int curvea, + mp_int curveb, mp_int genx, mp_int geny, mp_int order, + int cofactor, ECField field, const char *text) +{ + int bits; + ECGroup *group = NULL; + mp_err res = MP_OKAY; + + /* determine number of bits */ + bits = mpl_significant_bits(&irr) - 1; + if (bits < MP_OKAY) { + res = bits; + goto CLEANUP; + } + + /* determine which optimizations (if any) to use */ + if (field == ECField_GFp) { + switch (name) { + case ECCurve_SECG_PRIME_256R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_gfp256(group, name)); + MP_CHECKOK(ec_group_set_gfp256_32(group, name)); + break; + case ECCurve_SECG_PRIME_384R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_secp384r1(group, name)); + break; + case ECCurve_SECG_PRIME_521R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_gfp521(group, name)); + MP_CHECKOK(ec_group_set_secp521r1(group, name)); + break; + default: + /* use generic arithmetic */ + group = + ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny, + &order, cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + } + } else { + res = MP_UNDEF; + goto CLEANUP; + } + + /* set name, if any */ + if ((group != NULL) && (text != NULL)) { + group->text = strdup(text); + if (group->text == NULL) { + res = MP_MEM; + } + } + +CLEANUP: + if (group && res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct ECGroup from parameters and name, if any. */ +ECGroup * +ecgroup_fromName(const ECCurveName name, + const ECCurveBytes *params) +{ + mp_int irr, curvea, curveb, genx, geny, order; + ECGroup *group = NULL; + mp_err res = MP_OKAY; + + /* initialize values */ + MP_DIGITS(&irr) = 0; + MP_DIGITS(&curvea) = 0; + MP_DIGITS(&curveb) = 0; + MP_DIGITS(&genx) = 0; + MP_DIGITS(&geny) = 0; + MP_DIGITS(&order) = 0; + MP_CHECKOK(mp_init(&irr)); + MP_CHECKOK(mp_init(&curvea)); + MP_CHECKOK(mp_init(&curveb)); + MP_CHECKOK(mp_init(&genx)); + MP_CHECKOK(mp_init(&geny)); + MP_CHECKOK(mp_init(&order)); + MP_CHECKOK(mp_read_unsigned_octets(&irr, params->irr, params->scalarSize)); + MP_CHECKOK(mp_read_unsigned_octets(&curvea, params->curvea, params->scalarSize)); + MP_CHECKOK(mp_read_unsigned_octets(&curveb, params->curveb, params->scalarSize)); + MP_CHECKOK(mp_read_unsigned_octets(&genx, params->genx, params->scalarSize)); + MP_CHECKOK(mp_read_unsigned_octets(&geny, params->geny, params->scalarSize)); + MP_CHECKOK(mp_read_unsigned_octets(&order, params->order, params->scalarSize)); + + group = construct_ecgroup(name, irr, curvea, curveb, genx, geny, order, + params->cofactor, params->field, params->text); + +CLEANUP: + mp_clear(&irr); + mp_clear(&curvea); + mp_clear(&curveb); + mp_clear(&genx); + mp_clear(&geny); + mp_clear(&order); + if (group && res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct ECCurveBytes from an ECCurveName */ +const ECCurveBytes * +ec_GetNamedCurveParams(const ECCurveName name) +{ + if ((name <= ECCurve_noName) || (ECCurve_pastLastCurve <= name) || + (ecCurve_map[name] == NULL)) { + return NULL; + } else { + return ecCurve_map[name]; + } +} + +/* Construct ECGroup from named parameters. */ +ECGroup * +ECGroup_fromName(const ECCurveName name) +{ + const ECCurveBytes *params = NULL; + + /* This doesn't work with Curve25519 but it's not necessary to. */ + PORT_Assert(name != ECCurve25519); + + params = ec_GetNamedCurveParams(name); + if (params == NULL) { + return NULL; + } + + /* construct actual group */ + return ecgroup_fromName(name, params); +} + +/* Validates an EC public key as described in Section 5.2.2 of X9.62. */ +mp_err +ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py) +{ + /* 1: Verify that publicValue is not the point at infinity */ + /* 2: Verify that the coordinates of publicValue are elements + * of the field. + */ + /* 3: Verify that publicValue is on the curve. */ + /* 4: Verify that the order of the curve times the publicValue + * is the point at infinity. + */ + return group->validate_point(px, py, group); +} + +/* Free the memory allocated (if any) to an ECGroup object. */ +void +ECGroup_free(ECGroup *group) +{ + if (group == NULL) + return; + GFMethod_free(group->meth); + if (group->constructed == MP_NO) + return; + mp_clear(&group->curvea); + mp_clear(&group->curveb); + mp_clear(&group->genx); + mp_clear(&group->geny); + mp_clear(&group->order); + if (group->text != NULL) + free(group->text); + if (group->extra_free != NULL) + group->extra_free(group); + free(group); +} diff --git a/security/nss/lib/freebl/ecl/ecl.h b/security/nss/lib/freebl/ecl/ecl.h new file mode 100644 index 0000000000..f6d5bc4eaf --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl.h @@ -0,0 +1,49 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Although this is not an exported header file, code which uses elliptic + * curve point operations will need to include it. */ + +#ifndef __ecl_h_ +#define __ecl_h_ + +#include "blapi.h" +#include "ecl-exp.h" +#include "mpi.h" +#include "eclt.h" + +struct ECGroupStr; +typedef struct ECGroupStr ECGroup; + +/* Construct ECGroup from named parameters. */ +ECGroup *ECGroup_fromName(const ECCurveName name); + +/* Free an allocated ECGroup. */ +void ECGroup_free(ECGroup *group); + +/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k * P(x, + * y). If x, y = NULL, then P is assumed to be the generator (base point) + * of the group of points on the elliptic curve. Input and output values + * are assumed to be NOT field-encoded. */ +mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, + const mp_int *py, mp_int *qx, mp_int *qy); + +/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Input and output values are assumed to + * be NOT field-encoded. */ +mp_err ECPoints_mul(const ECGroup *group, const mp_int *k1, + const mp_int *k2, const mp_int *px, const mp_int *py, + mp_int *qx, mp_int *qy); + +/* Validates an EC public key as described in Section 5.2.2 of X9.62. + * Returns MP_YES if the public key is valid, MP_NO if the public key + * is invalid, or an error code if the validation could not be + * performed. */ +mp_err ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py); + +SECStatus ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P); +SECStatus ec_Curve25519_pt_validate(const SECItem *px); + +#endif /* __ecl_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl_gf.c b/security/nss/lib/freebl/ecl/ecl_gf.c new file mode 100644 index 0000000000..81b0077055 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_gf.c @@ -0,0 +1,958 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mp_gf2m.h" +#include "ecl-priv.h" +#include "mpi-priv.h" +#include + +/* Allocate memory for a new GFMethod object. */ +GFMethod * +GFMethod_new() +{ + mp_err res = MP_OKAY; + GFMethod *meth; + meth = (GFMethod *)malloc(sizeof(GFMethod)); + if (meth == NULL) + return NULL; + meth->constructed = MP_YES; + MP_DIGITS(&meth->irr) = 0; + meth->extra_free = NULL; + MP_CHECKOK(mp_init(&meth->irr)); + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Construct a generic GFMethod for arithmetic over prime fields with + * irreducible irr. */ +GFMethod * +GFMethod_consGFp(const mp_int *irr) +{ + mp_err res = MP_OKAY; + GFMethod *meth = NULL; + + meth = GFMethod_new(); + if (meth == NULL) + return NULL; + + MP_CHECKOK(mp_copy(irr, &meth->irr)); + meth->irr_arr[0] = mpl_significant_bits(irr); + meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] = + meth->irr_arr[4] = 0; + switch (MP_USED(&meth->irr)) { + /* maybe we need 1 and 2 words here as well?*/ + case 3: + meth->field_add = &ec_GFp_add_3; + meth->field_sub = &ec_GFp_sub_3; + break; + case 4: + meth->field_add = &ec_GFp_add_4; + meth->field_sub = &ec_GFp_sub_4; + break; + case 5: + meth->field_add = &ec_GFp_add_5; + meth->field_sub = &ec_GFp_sub_5; + break; + case 6: + meth->field_add = &ec_GFp_add_6; + meth->field_sub = &ec_GFp_sub_6; + break; + default: + meth->field_add = &ec_GFp_add; + meth->field_sub = &ec_GFp_sub; + } + meth->field_neg = &ec_GFp_neg; + meth->field_mod = &ec_GFp_mod; + meth->field_mul = &ec_GFp_mul; + meth->field_sqr = &ec_GFp_sqr; + meth->field_div = &ec_GFp_div; + meth->field_enc = NULL; + meth->field_dec = NULL; + meth->extra1 = NULL; + meth->extra2 = NULL; + meth->extra_free = NULL; + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Free the memory allocated (if any) to a GFMethod object. */ +void +GFMethod_free(GFMethod *meth) +{ + if (meth == NULL) + return; + if (meth->constructed == MP_NO) + return; + mp_clear(&meth->irr); + if (meth->extra_free != NULL) + meth->extra_free(meth); + free(meth); +} + +/* Wrapper functions for generic prime field arithmetic. */ + +/* Add two field elements. Assumes that 0 <= a, b < meth->irr */ +mp_err +ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */ + mp_err res; + + if ((res = mp_add(a, b, r)) != MP_OKAY) { + return res; + } + if (mp_cmp(r, &meth->irr) >= 0) { + return mp_sub(r, &meth->irr, r); + } + return res; +} + +/* Negates a field element. Assumes that 0 <= a < meth->irr */ +mp_err +ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */ + + if (mp_cmp_z(a) == 0) { + mp_zero(r); + return MP_OKAY; + } + return mp_sub(&meth->irr, a, r); +} + +/* Subtracts two field elements. Assumes that 0 <= a, b < meth->irr */ +mp_err +ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */ + res = mp_sub(a, b, r); + if (res == MP_RANGE) { + MP_CHECKOK(mp_sub(b, a, r)); + if (mp_cmp_z(r) < 0) { + MP_CHECKOK(mp_add(r, &meth->irr, r)); + } + MP_CHECKOK(ec_GFp_neg(r, r, meth)); + } + if (mp_cmp_z(r) < 0) { + MP_CHECKOK(mp_add(r, &meth->irr, r)); + } +CLEANUP: + return res; +} +/* + * Inline adds for small curve lengths. + */ +/* 3 words */ +mp_err +ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); +#else + __asm__( + "xorq %3,%3 \n\t" + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) + : "r"(a0), "r"(a1), "r"(a2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a2 = MP_DIGIT(&meth->irr, 2); + if (carry || r2 > a2 || + ((r2 == a2) && mp_cmp(r, &meth->irr) != MP_LT)) { + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); +#else + __asm__( + "subq %3,%0 \n\t" + "sbbq %4,%1 \n\t" + "sbbq %5,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "r"(a0), "r"(a1), "r"(a2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 4 words */ +mp_err +ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); +#else + __asm__( + "xorq %4,%4 \n\t" + "addq %5,%0 \n\t" + "adcq %6,%1 \n\t" + "adcq %7,%2 \n\t" + "adcq %8,%3 \n\t" + "adcq $0,%4 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry) + : "r"(a0), "r"(a1), "r"(a2), "r"(a3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + + MP_CHECKOK(s_mp_pad(r, 4)); + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a3 = MP_DIGIT(&meth->irr, 3); + if (carry || r3 > a3 || + ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) { + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); +#else + __asm__( + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "sbbq %7,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) + : "r"(a0), "r"(a1), "r"(a2), "r"(a3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 5 words */ +mp_err +ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 5: + a4 = MP_DIGIT(a, 4); + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 5: + r4 = MP_DIGIT(b, 4); + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); + MP_ADD_CARRY(a4, r4, r4, carry); + + MP_CHECKOK(s_mp_pad(r, 5)); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 5; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a4 = MP_DIGIT(&meth->irr, 4); + if (carry || r4 > a4 || + ((r4 == a4) && mp_cmp(r, &meth->irr) != MP_LT)) { + a3 = MP_DIGIT(&meth->irr, 3); + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); + MP_SUB_BORROW(r4, a4, r4, carry); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 6 words */ +mp_err +ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 6: + a5 = MP_DIGIT(a, 5); + case 5: + a4 = MP_DIGIT(a, 4); + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 6: + r5 = MP_DIGIT(b, 5); + case 5: + r4 = MP_DIGIT(b, 4); + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); + MP_ADD_CARRY(a4, r4, r4, carry); + MP_ADD_CARRY(a5, r5, r5, carry); + + MP_CHECKOK(s_mp_pad(r, 6)); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 6; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a5 = MP_DIGIT(&meth->irr, 5); + if (carry || r5 > a5 || + ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { + a4 = MP_DIGIT(&meth->irr, 4); + a3 = MP_DIGIT(&meth->irr, 3); + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); + MP_SUB_BORROW(r4, a4, r4, carry); + MP_SUB_BORROW(r5, a5, r5, carry); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* + * The following subraction functions do in-line subractions based + * on our curve size. + * + * ... 3 words + */ +mp_err +ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); +#else + __asm__( + "xorq %3,%3 \n\t" + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) + : "r"(b0), "r"(b1), "r"(b2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); +#else + __asm__( + "addq %3,%0 \n\t" + "adcq %4,%1 \n\t" + "adcq %5,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "r"(b0), "r"(b1), "r"(b2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + } + +#ifdef MPI_AMD64_ADD + /* compiler fakeout? */ + if ((r2 == b0) && (r1 == b0) && (r0 == b0)) { + MP_CHECKOK(s_mp_pad(r, 4)); + } +#endif + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 4 words */ +mp_err +ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); +#else + __asm__( + "xorq %4,%4 \n\t" + "subq %5,%0 \n\t" + "sbbq %6,%1 \n\t" + "sbbq %7,%2 \n\t" + "sbbq %8,%3 \n\t" + "adcq $0,%4 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) + : "r"(b0), "r"(b1), "r"(b2), "r"(b3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); +#else + __asm__( + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq %7,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) + : "r"(b0), "r"(b1), "r"(b2), "r"(b3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + } +#ifdef MPI_AMD64_ADD + /* compiler fakeout? */ + if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { + MP_CHECKOK(s_mp_pad(r, 4)); + } +#endif + MP_CHECKOK(s_mp_pad(r, 4)); + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 5 words */ +mp_err +ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 5: + r4 = MP_DIGIT(a, 4); + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 5: + b4 = MP_DIGIT(b, 4); + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); + MP_SUB_BORROW(r4, b4, r4, borrow); + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b4 = MP_DIGIT(&meth->irr, 4); + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); + MP_ADD_CARRY(b4, r4, r4, borrow); + } + MP_CHECKOK(s_mp_pad(r, 5)); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 5; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 6 words */ +mp_err +ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 6: + r5 = MP_DIGIT(a, 5); + case 5: + r4 = MP_DIGIT(a, 4); + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 6: + b5 = MP_DIGIT(b, 5); + case 5: + b4 = MP_DIGIT(b, 4); + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); + MP_SUB_BORROW(r4, b4, r4, borrow); + MP_SUB_BORROW(r5, b5, r5, borrow); + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b5 = MP_DIGIT(&meth->irr, 5); + b4 = MP_DIGIT(&meth->irr, 4); + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); + MP_ADD_CARRY(b4, r4, r4, borrow); + MP_ADD_CARRY(b5, r5, r5, borrow); + } + + MP_CHECKOK(s_mp_pad(r, 6)); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 6; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* Reduces an integer to a field element. */ +mp_err +ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_mod(a, &meth->irr, r); +} + +/* Multiplies two field elements. */ +mp_err +ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_mulmod(a, b, &meth->irr, r); +} + +/* Squares a field element. */ +mp_err +ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_sqrmod(a, &meth->irr, r); +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r)); + CLEANUP: + mp_clear(&t); + return res; + } +} + +/* Wrapper functions for generic binary polynomial field arithmetic. */ + +/* Adds two field elements. */ +mp_err +ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_badd(a, b, r); +} + +/* Negates a field element. Note that for binary polynomial fields, the + * negation of a field element is the field element itself. */ +mp_err +ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + if (a == r) { + return MP_OKAY; + } else { + return mp_copy(a, r); + } +} + +/* Reduces a binary polynomial to a field element. */ +mp_err +ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_bmod(a, meth->irr_arr, r); +} + +/* Multiplies two field elements. */ +mp_err +ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_bmulmod(a, b, meth->irr_arr, r); +} + +/* Squares a field element. */ +mp_err +ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_bsqrmod(a, meth->irr_arr, r); +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + /* The GF(2^m) portion of MPI doesn't support invmod, so we + * compute 1/b. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_set_int(&t, 1)); + MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r)); + CLEANUP: + mp_clear(&t); + return res; + } else { + return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r); + } +} diff --git a/security/nss/lib/freebl/ecl/ecl_mult.c b/security/nss/lib/freebl/ecl/ecl_mult.c new file mode 100644 index 0000000000..ffbcbf1d98 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_mult.c @@ -0,0 +1,305 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecl-priv.h" +#include + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x, + * y). If x, y = NULL, then P is assumed to be the generator (base point) + * of the group of points on the elliptic curve. Input and output values + * are assumed to be NOT field-encoded. */ +mp_err +ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry) +{ + mp_err res = MP_OKAY; + mp_int kt; + + ARGCHK((k != NULL) && (group != NULL), MP_BADARG); + MP_DIGITS(&kt) = 0; + + /* want scalar to be less than or equal to group order */ + if (mp_cmp(k, &group->order) > 0) { + MP_CHECKOK(mp_init(&kt)); + MP_CHECKOK(mp_mod(k, &group->order, &kt)); + } else { + MP_SIGN(&kt) = MP_ZPOS; + MP_USED(&kt) = MP_USED(k); + MP_ALLOC(&kt) = MP_ALLOC(k); + MP_DIGITS(&kt) = MP_DIGITS(k); + } + + if ((px == NULL) || (py == NULL)) { + if (group->base_point_mul) { + MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group)); + } else { + MP_CHECKOK(group->point_mul(&kt, &group->genx, &group->geny, rx, ry, + group)); + } + } else { + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, rx, group->meth)); + MP_CHECKOK(group->meth->field_enc(py, ry, group->meth)); + MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group)); + } else { + MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group)); + } + } + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + if (MP_DIGITS(&kt) != MP_DIGITS(k)) { + mp_clear(&kt); + } + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. */ +mp_err +ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int sx, sy; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + MP_DIGITS(&sx) = 0; + MP_DIGITS(&sy) = 0; + MP_CHECKOK(mp_init(&sx)); + MP_CHECKOK(mp_init(&sy)); + + MP_CHECKOK(ECPoint_mul(group, k1, NULL, NULL, &sx, &sy)); + MP_CHECKOK(ECPoint_mul(group, k2, px, py, rx, ry)); + + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&sx, &sx, group->meth)); + MP_CHECKOK(group->meth->field_enc(&sy, &sy, group->meth)); + MP_CHECKOK(group->meth->field_enc(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_enc(ry, ry, group->meth)); + } + + MP_CHECKOK(group->point_add(&sx, &sy, rx, ry, rx, ry, group)); + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + mp_clear(&sx); + mp_clear(&sy); + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. Uses + * algorithm 15 (simultaneous multiple point multiplication) from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST + * Elliptic Curves over Prime Fields. */ +mp_err +ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[4][4][2]; + const mp_int *a, *b; + unsigned int i, j; + int ai, bi, d; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + /* initialize precomputation table */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_DIGITS(&precomp[i][j][0]) = 0; + MP_DIGITS(&precomp[i][j][1]) = 0; + } + } + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_CHECKOK(mp_init_size(&precomp[i][j][0], + ECL_MAX_FIELD_SIZE_DIGITS)); + MP_CHECKOK(mp_init_size(&precomp[i][j][1], + ECL_MAX_FIELD_SIZE_DIGITS)); + } + } + + /* fill precomputation table */ + /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ + if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { + a = k2; + b = k1; + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); + } + MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); + } else { + a = k1; + b = k2; + MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); + } + } + /* precompute [*][0][*] */ + mp_zero(&precomp[0][0][0]); + mp_zero(&precomp[0][0][1]); + MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], group)); + MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], + &precomp[3][0][0], &precomp[3][0][1], group)); + /* precompute [*][1][*] */ + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][1][0], &precomp[i][1][1], group)); + } + /* precompute [*][2][*] */ + MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][2][0], &precomp[i][2][1], group)); + } + /* precompute [*][3][*] */ + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], + &precomp[0][3][0], &precomp[0][3][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][3][0], &precomp[i][3][1], group)); + } + + d = (mpl_significant_bits(a) + 1) / 2; + + /* R = inf */ + mp_zero(rx); + mp_zero(ry); + + for (i = d; i-- > 0;) { + ai = MP_GET_BIT(a, 2 * i + 1); + ai <<= 1; + ai |= MP_GET_BIT(a, 2 * i); + bi = MP_GET_BIT(b, 2 * i + 1); + bi <<= 1; + bi |= MP_GET_BIT(b, 2 * i); + /* R = 2^2 * R */ + MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); + MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); + /* R = R + (ai * A + bi * B) */ + MP_CHECKOK(group->point_add(rx, ry, &precomp[ai][bi][0], + &precomp[ai][bi][1], rx, ry, group)); + } + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + mp_clear(&precomp[i][j][0]); + mp_clear(&precomp[i][j][1]); + } + } + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. */ +mp_err +ECPoints_mul(const ECGroup *group, const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry) +{ + mp_err res = MP_OKAY; + mp_int k1t, k2t; + const mp_int *k1p, *k2p; + + MP_DIGITS(&k1t) = 0; + MP_DIGITS(&k2t) = 0; + + ARGCHK(group != NULL, MP_BADARG); + + /* want scalar to be less than or equal to group order */ + if (k1 != NULL) { + if (mp_cmp(k1, &group->order) >= 0) { + MP_CHECKOK(mp_init(&k1t)); + MP_CHECKOK(mp_mod(k1, &group->order, &k1t)); + k1p = &k1t; + } else { + k1p = k1; + } + } else { + k1p = k1; + } + if (k2 != NULL) { + if (mp_cmp(k2, &group->order) >= 0) { + MP_CHECKOK(mp_init(&k2t)); + MP_CHECKOK(mp_mod(k2, &group->order, &k2t)); + k2p = &k2t; + } else { + k2p = k2; + } + } else { + k2p = k2; + } + + /* if points_mul is defined, then use it */ + if (group->points_mul) { + res = group->points_mul(k1p, k2p, px, py, rx, ry, group); + } else { + res = ec_pts_mul_simul_w2(k1p, k2p, px, py, rx, ry, group); + } + +CLEANUP: + mp_clear(&k1t); + mp_clear(&k2t); + return res; +} diff --git a/security/nss/lib/freebl/ecl/eclt.h b/security/nss/lib/freebl/ecl/eclt.h new file mode 100644 index 0000000000..e763706f26 --- /dev/null +++ b/security/nss/lib/freebl/ecl/eclt.h @@ -0,0 +1,30 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This header holds ECC types and must not be exported publicly. */ + +#ifndef __eclt_h_ +#define __eclt_h_ + +/* byte encoding of curve parameters */ +struct ECCurveBytesStr { + char *text; + ECField field; + size_t size; + const PRUint8 *irr; + const PRUint8 *curvea; + const PRUint8 *curveb; + const PRUint8 *genx; + const PRUint8 *geny; + const PRUint8 *order; + const PRUint8 *base; + int cofactor; + int security; + size_t pointSize; + size_t scalarSize; + unsigned int usage; +}; +typedef struct ECCurveBytesStr ECCurveBytes; + +#endif /* __ecl_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecp.h b/security/nss/lib/freebl/ecl/ecp.h new file mode 100644 index 0000000000..7e54e4e072 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp.h @@ -0,0 +1,106 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecp_h_ +#define __ecp_h_ + +#include "ecl-priv.h" + +/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */ +mp_err ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py); + +/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */ +mp_err ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py); + +/* Computes R = P + Q where R is (rx, ry), P is (px, py) and Q is (qx, + * qy). Uses affine coordinates. */ +mp_err ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes R = P - Q. Uses affine coordinates. */ +mp_err ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes R = 2P. Uses affine coordinates. */ +mp_err ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Validates a point on a GFp curve. */ +mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group); + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Uses affine coordinates. */ +mp_err ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); +#endif + +/* Converts a point P(px, py) from affine coordinates to Jacobian + * projective coordinates R(rx, ry, rz). */ +mp_err ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group); + +/* Converts a point P(px, py, pz) from Jacobian projective coordinates to + * affine coordinates R(rx, ry). */ +mp_err ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, + const mp_int *pz, mp_int *rx, mp_int *ry, + const ECGroup *group); + +/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian + * coordinates. */ +mp_err ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, + const mp_int *pz); + +/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian + * coordinates. */ +mp_err ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz); + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, qz). Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, + const mp_int *pz, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + mp_int *rz, const ECGroup *group); + +/* Computes R = 2P. Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, + const mp_int *pz, mp_int *rx, mp_int *ry, + mp_int *rz, const ECGroup *group); + +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); +#endif + +/* Computes R(x, y) = k1 * G + k2 * P(x, y), where G is the generator + * (base point) of the group of points on the elliptic curve. Allows k1 = + * NULL or { k2, P } = NULL. Implemented using mixed Jacobian-affine + * coordinates. Input and output values are assumed to be NOT + * field-encoded and are in affine form. */ +mp_err +ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); + +/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic + * curve points P and R can be identical. Uses mixed Modified-Jacobian + * co-ordinates for doubling and Chudnovsky Jacobian coordinates for + * additions. Assumes input is already field-encoded using field_enc, and + * returns output that is still field-encoded. Uses 5-bit window NAF + * method (algorithm 11) for scalar-point multiplication from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic + * Curves Over Prime Fields. */ +mp_err +ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group); + +#endif /* __ecp_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c new file mode 100644 index 0000000000..ceecd2aced --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_25519.c @@ -0,0 +1,126 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* curve 25519 https://www.rfc-editor.org/rfc/rfc7748.txt */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include "ecl-priv.h" +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" +#include "secmpi.h" +#include "secitem.h" +#include "secport.h" +#include +#include + +/* + * point validation is not necessary in general. But this checks a point (px) + * against some known bad values. + */ +SECStatus +ec_Curve25519_pt_validate(const SECItem *px) +{ + PRUint8 *p; + PRUint64 i; + PRUint8 forbiddenValues[12][32] = { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, + { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, + { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, + { 0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, + { 0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + }; + + if (px->len == 32) { + p = px->data; + } else { + return SECFailure; + } + + for (i = 0; i < PR_ARRAY_SIZE(forbiddenValues); ++i) { + if (NSS_SecureMemcmp(p, forbiddenValues[i], px->len) == 0) { + return SECFailure; + } + } + + return SECSuccess; +} + +/* + * Scalar multiplication for Curve25519. + * If P == NULL, the base point is used. + * Returns X = k*P + */ +SECStatus +ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P) +{ + PRUint8 *px; + PRUint8 basePoint[32] = { 9 }; + + if (!P) { + px = basePoint; + } else { + PORT_Assert(P->len == 32); + if (P->len != 32) { + return SECFailure; + } + px = P->data; + } + if (k->len != 32) { + return SECFailure; + } + + SECStatus rv = ec_Curve25519_mul(X->data, k->data, px); + if (NSS_SecureMemcmpZero(X->data, X->len) == 0) { + return SECFailure; + } + return rv; +} diff --git a/security/nss/lib/freebl/ecl/ecp_256.c b/security/nss/lib/freebl/ecl/ecp_256.c new file mode 100644 index 0000000000..ad4e630c17 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_256.c @@ -0,0 +1,401 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. + * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_size a_used = MP_USED(a); + int a_bits = mpl_significant_bits(a); + mp_digit carry; + +#ifdef ECL_THIRTY_TWO_BIT + mp_digit a8 = 0, a9 = 0, a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0, a15 = 0; + mp_digit r0, r1, r2, r3, r4, r5, r6, r7; + int r8; /* must be a signed value ! */ +#else + mp_digit a4 = 0, a5 = 0, a6 = 0, a7 = 0; + mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; + mp_digit r0, r1, r2, r3; + int r4; /* must be a signed value ! */ +#endif + /* for polynomials larger than twice the field size + * use regular reduction */ + if (a_bits < 256) { + if (a == r) + return MP_OKAY; + return mp_copy(a, r); + } + if (a_bits > 512) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + +#ifdef ECL_THIRTY_TWO_BIT + switch (a_used) { + case 16: + a15 = MP_DIGIT(a, 15); + case 15: + a14 = MP_DIGIT(a, 14); + case 14: + a13 = MP_DIGIT(a, 13); + case 13: + a12 = MP_DIGIT(a, 12); + case 12: + a11 = MP_DIGIT(a, 11); + case 11: + a10 = MP_DIGIT(a, 10); + case 10: + a9 = MP_DIGIT(a, 9); + case 9: + a8 = MP_DIGIT(a, 8); + } + + r0 = MP_DIGIT(a, 0); + r1 = MP_DIGIT(a, 1); + r2 = MP_DIGIT(a, 2); + r3 = MP_DIGIT(a, 3); + r4 = MP_DIGIT(a, 4); + r5 = MP_DIGIT(a, 5); + r6 = MP_DIGIT(a, 6); + r7 = MP_DIGIT(a, 7); + + /* sum 1 */ + carry = 0; + MP_ADD_CARRY(r3, a11, r3, carry); + MP_ADD_CARRY(r4, a12, r4, carry); + MP_ADD_CARRY(r5, a13, r5, carry); + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); + r8 = carry; + carry = 0; + MP_ADD_CARRY(r3, a11, r3, carry); + MP_ADD_CARRY(r4, a12, r4, carry); + MP_ADD_CARRY(r5, a13, r5, carry); + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); + r8 += carry; + carry = 0; + /* sum 2 */ + MP_ADD_CARRY(r3, a12, r3, carry); + MP_ADD_CARRY(r4, a13, r4, carry); + MP_ADD_CARRY(r5, a14, r5, carry); + MP_ADD_CARRY(r6, a15, r6, carry); + MP_ADD_CARRY(r7, 0, r7, carry); + r8 += carry; + carry = 0; + /* combine last bottom of sum 3 with second sum 2 */ + MP_ADD_CARRY(r0, a8, r0, carry); + MP_ADD_CARRY(r1, a9, r1, carry); + MP_ADD_CARRY(r2, a10, r2, carry); + MP_ADD_CARRY(r3, a12, r3, carry); + MP_ADD_CARRY(r4, a13, r4, carry); + MP_ADD_CARRY(r5, a14, r5, carry); + MP_ADD_CARRY(r6, a15, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); /* from sum 3 */ + r8 += carry; + carry = 0; + /* sum 3 (rest of it)*/ + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, 0, r7, carry); + r8 += carry; + carry = 0; + /* sum 4 (rest of it)*/ + MP_ADD_CARRY(r0, a9, r0, carry); + MP_ADD_CARRY(r1, a10, r1, carry); + MP_ADD_CARRY(r2, a11, r2, carry); + MP_ADD_CARRY(r3, a13, r3, carry); + MP_ADD_CARRY(r4, a14, r4, carry); + MP_ADD_CARRY(r5, a15, r5, carry); + MP_ADD_CARRY(r6, a13, r6, carry); + MP_ADD_CARRY(r7, a8, r7, carry); + r8 += carry; + carry = 0; + /* diff 5 */ + MP_SUB_BORROW(r0, a11, r0, carry); + MP_SUB_BORROW(r1, a12, r1, carry); + MP_SUB_BORROW(r2, a13, r2, carry); + MP_SUB_BORROW(r3, 0, r3, carry); + MP_SUB_BORROW(r4, 0, r4, carry); + MP_SUB_BORROW(r5, 0, r5, carry); + MP_SUB_BORROW(r6, a8, r6, carry); + MP_SUB_BORROW(r7, a10, r7, carry); + r8 -= carry; + carry = 0; + /* diff 6 */ + MP_SUB_BORROW(r0, a12, r0, carry); + MP_SUB_BORROW(r1, a13, r1, carry); + MP_SUB_BORROW(r2, a14, r2, carry); + MP_SUB_BORROW(r3, a15, r3, carry); + MP_SUB_BORROW(r4, 0, r4, carry); + MP_SUB_BORROW(r5, 0, r5, carry); + MP_SUB_BORROW(r6, a9, r6, carry); + MP_SUB_BORROW(r7, a11, r7, carry); + r8 -= carry; + carry = 0; + /* diff 7 */ + MP_SUB_BORROW(r0, a13, r0, carry); + MP_SUB_BORROW(r1, a14, r1, carry); + MP_SUB_BORROW(r2, a15, r2, carry); + MP_SUB_BORROW(r3, a8, r3, carry); + MP_SUB_BORROW(r4, a9, r4, carry); + MP_SUB_BORROW(r5, a10, r5, carry); + MP_SUB_BORROW(r6, 0, r6, carry); + MP_SUB_BORROW(r7, a12, r7, carry); + r8 -= carry; + carry = 0; + /* diff 8 */ + MP_SUB_BORROW(r0, a14, r0, carry); + MP_SUB_BORROW(r1, a15, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, a9, r3, carry); + MP_SUB_BORROW(r4, a10, r4, carry); + MP_SUB_BORROW(r5, a11, r5, carry); + MP_SUB_BORROW(r6, 0, r6, carry); + MP_SUB_BORROW(r7, a13, r7, carry); + r8 -= carry; + + /* reduce the overflows */ + while (r8 > 0) { + mp_digit r8_d = r8; + carry = 0; + MP_ADD_CARRY(r0, r8_d, r0, carry); + MP_ADD_CARRY(r1, 0, r1, carry); + MP_ADD_CARRY(r2, 0, r2, carry); + MP_ADD_CARRY(r3, 0 - r8_d, r3, carry); + MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry); + MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry); + MP_ADD_CARRY(r6, 0 - (r8_d + 1), r6, carry); + MP_ADD_CARRY(r7, (r8_d - 1), r7, carry); + r8 = carry; + } + + /* reduce the underflows */ + while (r8 < 0) { + mp_digit r8_d = -r8; + carry = 0; + MP_SUB_BORROW(r0, r8_d, r0, carry); + MP_SUB_BORROW(r1, 0, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, 0 - r8_d, r3, carry); + MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry); + MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry); + MP_SUB_BORROW(r6, 0 - (r8_d + 1), r6, carry); + MP_SUB_BORROW(r7, (r8_d - 1), r7, carry); + r8 = 0 - carry; + } + if (a != r) { + MP_CHECKOK(s_mp_pad(r, 8)); + } + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 8; + + MP_DIGIT(r, 7) = r7; + MP_DIGIT(r, 6) = r6; + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + + /* final reduction if necessary */ + if ((r7 == MP_DIGIT_MAX) && + ((r6 > 1) || ((r6 == 1) && + (r5 || r4 || r3 || + ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) && (r0 == MP_DIGIT_MAX)))))) { + MP_CHECKOK(mp_sub(r, &meth->irr, r)); + } + + s_mp_clamp(r); +#else + switch (a_used) { + case 8: + a7 = MP_DIGIT(a, 7); + case 7: + a6 = MP_DIGIT(a, 6); + case 6: + a5 = MP_DIGIT(a, 5); + case 5: + a4 = MP_DIGIT(a, 4); + } + a7l = a7 << 32; + a7h = a7 >> 32; + a6l = a6 << 32; + a6h = a6 >> 32; + a5l = a5 << 32; + a5h = a5 >> 32; + a4l = a4 << 32; + a4h = a4 >> 32; + r3 = MP_DIGIT(a, 3); + r2 = MP_DIGIT(a, 2); + r1 = MP_DIGIT(a, 1); + r0 = MP_DIGIT(a, 0); + + /* sum 1 */ + carry = 0; + MP_ADD_CARRY(r1, a5h << 32, r1, carry); + MP_ADD_CARRY(r2, a6, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 = carry; + carry = 0; + MP_ADD_CARRY(r1, a5h << 32, r1, carry); + MP_ADD_CARRY(r2, a6, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 += carry; + /* sum 2 */ + carry = 0; + MP_ADD_CARRY(r1, a6l, r1, carry); + MP_ADD_CARRY(r2, a6h | a7l, r2, carry); + MP_ADD_CARRY(r3, a7h, r3, carry); + r4 += carry; + carry = 0; + MP_ADD_CARRY(r1, a6l, r1, carry); + MP_ADD_CARRY(r2, a6h | a7l, r2, carry); + MP_ADD_CARRY(r3, a7h, r3, carry); + r4 += carry; + + /* sum 3 */ + carry = 0; + MP_ADD_CARRY(r0, a4, r0, carry); + MP_ADD_CARRY(r1, a5l >> 32, r1, carry); + MP_ADD_CARRY(r2, 0, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 += carry; + /* sum 4 */ + carry = 0; + MP_ADD_CARRY(r0, a4h | a5l, r0, carry); + MP_ADD_CARRY(r1, a5h | (a6h << 32), r1, carry); + MP_ADD_CARRY(r2, a7, r2, carry); + MP_ADD_CARRY(r3, a6h | a4l, r3, carry); + r4 += carry; + /* diff 5 */ + carry = 0; + MP_SUB_BORROW(r0, a5h | a6l, r0, carry); + MP_SUB_BORROW(r1, a6h, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, (a4l >> 32) | a5l, r3, carry); + r4 -= carry; + /* diff 6 */ + carry = 0; + MP_SUB_BORROW(r0, a6, r0, carry); + MP_SUB_BORROW(r1, a7, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, a4h | (a5h << 32), r3, carry); + r4 -= carry; + /* diff 7 */ + carry = 0; + MP_SUB_BORROW(r0, a6h | a7l, r0, carry); + MP_SUB_BORROW(r1, a7h | a4l, r1, carry); + MP_SUB_BORROW(r2, a4h | a5l, r2, carry); + MP_SUB_BORROW(r3, a6l, r3, carry); + r4 -= carry; + /* diff 8 */ + carry = 0; + MP_SUB_BORROW(r0, a7, r0, carry); + MP_SUB_BORROW(r1, a4h << 32, r1, carry); + MP_SUB_BORROW(r2, a5, r2, carry); + MP_SUB_BORROW(r3, a6h << 32, r3, carry); + r4 -= carry; + + /* reduce the overflows */ + while (r4 > 0) { + mp_digit r4_long = r4; + mp_digit r4l = (r4_long << 32); + carry = 0; + MP_ADD_CARRY(r0, r4_long, r0, carry); + MP_ADD_CARRY(r1, 0 - r4l, r1, carry); + MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry); + MP_ADD_CARRY(r3, r4l - r4_long - 1, r3, carry); + r4 = carry; + } + + /* reduce the underflows */ + while (r4 < 0) { + mp_digit r4_long = -r4; + mp_digit r4l = (r4_long << 32); + carry = 0; + MP_SUB_BORROW(r0, r4_long, r0, carry); + MP_SUB_BORROW(r1, 0 - r4l, r1, carry); + MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry); + MP_SUB_BORROW(r3, r4l - r4_long - 1, r3, carry); + r4 = 0 - carry; + } + + if (a != r) { + MP_CHECKOK(s_mp_pad(r, 4)); + } + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + + /* final reduction if necessary */ + if ((r3 > 0xFFFFFFFF00000001ULL) || + ((r3 == 0xFFFFFFFF00000001ULL) && + (r2 || (r1 >> 32) || + (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { + /* very rare, just use mp_sub */ + MP_CHECKOK(mp_sub(r, &meth->irr, r)); + } + + s_mp_clamp(r); +#endif + } + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p256. Store the + * result in r. r could be a. Uses optimized modular reduction for p256. + */ +static mp_err +ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p256. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p256. */ +static mp_err +ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp256(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P256) { + group->meth->field_mod = &ec_GFp_nistp256_mod; + group->meth->field_mul = &ec_GFp_nistp256_mul; + group->meth->field_sqr = &ec_GFp_nistp256_sqr; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_256_32.c b/security/nss/lib/freebl/ecl/ecp_256_32.c new file mode 100644 index 0000000000..879396ad42 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_256_32.c @@ -0,0 +1,1535 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* A 32-bit implementation of the NIST P-256 elliptic curve. */ + +#include + +#include "prtypes.h" +#include "mpi.h" +#include "mpi-priv.h" +#include "ecp.h" + +typedef PRUint8 u8; +typedef PRUint32 u32; +typedef PRUint64 u64; + +/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's + * MPI library calls them digits, but here they are called limbs, which is + * GMP's terminology. + * + * The value of an felem (field element) is: + * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) + * + * That is, each limb is alternately 29 or 28-bits wide in little-endian + * order. + * + * This means that an felem hits 2**257, rather than 2**256 as we would like. A + * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems + * when multiplying as terms end up one bit short of a limb which would require + * much bit-shifting to correct. + * + * Finally, the values stored in an felem are in Montgomery form. So the value + * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257. + */ +typedef u32 limb; +#define NLIMBS 9 +typedef limb felem[NLIMBS]; + +static const limb kBottom28Bits = 0xfffffff; +static const limb kBottom29Bits = 0x1fffffff; + +/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and + * 28-bit words. + */ +static const felem kOne = { + 2, 0, 0, 0xffff800, + 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, + 0 +}; +static const felem kZero = { 0 }; +static const felem kP = { + 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, + 0, 0, 0x200000, 0xf000000, + 0xfffffff +}; +static const felem k2P = { + 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, + 0, 0, 0x400000, 0xe000000, + 0x1fffffff +}; + +/* kPrecomputed contains precomputed values to aid the calculation of scalar + * multiples of the base point, G. It's actually two, equal length, tables + * concatenated. + * + * The first table contains (x,y) felem pairs for 16 multiples of the base + * point, G. + * + * Index | Index (binary) | Value + * 0 | 0000 | 0G (all zeros, omitted) + * 1 | 0001 | G + * 2 | 0010 | 2**64G + * 3 | 0011 | 2**64G + G + * 4 | 0100 | 2**128G + * 5 | 0101 | 2**128G + G + * 6 | 0110 | 2**128G + 2**64G + * 7 | 0111 | 2**128G + 2**64G + G + * 8 | 1000 | 2**192G + * 9 | 1001 | 2**192G + G + * 10 | 1010 | 2**192G + 2**64G + * 11 | 1011 | 2**192G + 2**64G + G + * 12 | 1100 | 2**192G + 2**128G + * 13 | 1101 | 2**192G + 2**128G + G + * 14 | 1110 | 2**192G + 2**128G + 2**64G + * 15 | 1111 | 2**192G + 2**128G + 2**64G + G + * + * The second table follows the same style, but the terms are 2**32G, + * 2**96G, 2**160G, 2**224G. + * + * This is ~2KB of data. + */ +static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = { + 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, + 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, + 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, + 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, + 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, + 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, + 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, + 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, + 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, + 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, + 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, + 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, + 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, + 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, + 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, + 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, + 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, + 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, + 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, + 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, + 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, + 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, + 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, + 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, + 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, + 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, + 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, + 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, + 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, + 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, + 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, + 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, + 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, + 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, + 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, + 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, + 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, + 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, + 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, + 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, + 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, + 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, + 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, + 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, + 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, + 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, + 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, + 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, + 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, + 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, + 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, + 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, + 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, + 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, + 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, + 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, + 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, + 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, + 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, + 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78 +}; + +/* Field element operations: + */ + +/* NON_ZERO_TO_ALL_ONES returns: + * 0xffffffff for 0 < x <= 2**31 + * 0 for x == 0 or x > 2**31. + * + * x must be a u32 or an equivalent type such as limb. + */ +#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x)-1) >> 31) - 1) + +/* felem_reduce_carry adds a multiple of p in order to cancel |carry|, + * which is a term at 2**257. + * + * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. + * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. + */ +static void +felem_reduce_carry(felem inout, limb carry) +{ + const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); + + inout[0] += carry << 1; + inout[3] += 0x10000000 & carry_mask; + /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the + * previous line therefore this doesn't underflow. + */ + inout[3] -= carry << 11; + inout[4] += (0x20000000 - 1) & carry_mask; + inout[5] += (0x10000000 - 1) & carry_mask; + inout[6] += (0x20000000 - 1) & carry_mask; + inout[6] -= carry << 22; + /* This may underflow if carry is non-zero but, if so, we'll fix it in the + * next line. + */ + inout[7] -= 1 & carry_mask; + inout[7] += carry << 25; +} + +/* felem_sum sets out = in+in2. + * + * On entry, in[i]+in2[i] must not overflow a 32-bit word. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 + */ +static void +felem_sum(felem out, const felem in, const felem in2) +{ + limb carry = 0; + unsigned int i; + for (i = 0;; i++) { + out[i] = in[i] + in2[i]; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] = in[i] + in2[i]; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +#define two31m3 (((limb)1) << 31) - (((limb)1) << 3) +#define two30m2 (((limb)1) << 30) - (((limb)1) << 2) +#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2) +#define two31m2 (((limb)1) << 31) - (((limb)1) << 2) +#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2) +#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2) + +/* zero31 is 0 mod p. + */ +static const felem zero31 = { + two31m3, two30m2, two31m2, two30p13m2, + two31m2, two30m2, two31p24m2, two30m27m2, + two31m2 +}; + +/* felem_diff sets out = in-in2. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and + * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_diff(felem out, const felem in, const felem in2) +{ + limb carry = 0; + unsigned int i; + + for (i = 0;; i++) { + out[i] = in[i] - in2[i]; + out[i] += zero31[i]; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] = in[i] - in2[i]; + out[i] += zero31[i]; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words + * with the same 29,28,... bit positions as an felem. + * + * The values in felems are in Montgomery form: x*R mod p where R = 2**257. + * Since we just multiplied two Montgomery values together, the result is + * x*y*R*R mod p. We wish to divide by R in order for the result also to be + * in Montgomery form. + * + * On entry: tmp[i] < 2**64 + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 + */ +static void +felem_reduce_degree(felem out, u64 tmp[17]) +{ + /* The following table may be helpful when reading this code: + * + * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... + * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 + * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 + * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 + */ + limb tmp2[18], carry, x, xMask; + unsigned int i; + + /* tmp contains 64-bit words with the same 29,28,29-bit positions as an + * felem. So the top of an element of tmp might overlap with another + * element two positions down. The following loop eliminates this + * overlap. + */ + tmp2[0] = tmp[0] & kBottom29Bits; + + /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try + * and hint to the compiler that it can do a single-word shift by selecting + * the right register rather than doing a double-word shift and truncating + * afterwards. + */ + tmp2[1] = ((limb)tmp[0]) >> 29; + tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits; + tmp2[1] += ((limb)tmp[1]) & kBottom28Bits; + carry = tmp2[1] >> 28; + tmp2[1] &= kBottom28Bits; + + for (i = 2; i < 17; i++) { + tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; + tmp2[i] += ((limb)(tmp[i - 1])) >> 28; + tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits; + tmp2[i] += ((limb)tmp[i]) & kBottom29Bits; + tmp2[i] += carry; + carry = tmp2[i] >> 29; + tmp2[i] &= kBottom29Bits; + + i++; + if (i == 17) + break; + tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; + tmp2[i] += ((limb)(tmp[i - 1])) >> 29; + tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits; + tmp2[i] += ((limb)tmp[i]) & kBottom28Bits; + tmp2[i] += carry; + carry = tmp2[i] >> 28; + tmp2[i] &= kBottom28Bits; + } + + tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25; + tmp2[17] += ((limb)(tmp[16])) >> 29; + tmp2[17] += (((limb)(tmp[16] >> 32)) << 3); + tmp2[17] += carry; + + /* Montgomery elimination of terms: + * + * Since R is 2**257, we can divide by R with a bitwise shift if we can + * ensure that the right-most 257 bits are all zero. We can make that true + * by adding multiplies of p without affecting the value. + * + * So we eliminate limbs from right to left. Since the bottom 29 bits of p + * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. + * We can do that for 8 further limbs and then right shift to eliminate the + * extra factor of R. + */ + for (i = 0;; i += 2) { + tmp2[i + 1] += tmp2[i] >> 29; + x = tmp2[i] & kBottom29Bits; + xMask = NON_ZERO_TO_ALL_ONES(x); + tmp2[i] = 0; + + /* The bounds calculations for this loop are tricky. Each iteration of + * the loop eliminates two words by adding values to words to their + * right. + * + * The following table contains the amounts added to each word (as an + * offset from the value of i at the top of the loop). The amounts are + * accounted for from the first and second half of the loop separately + * and are written as, for example, 28 to mean a value <2**28. + * + * Word: 3 4 5 6 7 8 9 10 + * Added in top half: 28 11 29 21 29 28 + * 28 29 + * 29 + * Added in bottom half: 29 10 28 21 28 28 + * 29 + * + * The value that is currently offset 7 will be offset 5 for the next + * iteration and then offset 3 for the iteration after that. Therefore + * the total value added will be the values added at 7, 5 and 3. + * + * The following table accumulates these values. The sums at the bottom + * are written as, for example, 29+28, to mean a value < 2**29+2**28. + * + * Word: 3 4 5 6 7 8 9 10 11 12 13 + * 28 11 10 29 21 29 28 28 28 28 28 + * 29 28 11 28 29 28 29 28 29 28 + * 29 28 21 21 29 21 29 21 + * 10 29 28 21 28 21 28 + * 28 29 28 29 28 29 28 + * 11 10 29 10 29 10 + * 29 28 11 28 11 + * 29 29 + * -------------------------------------------- + * 30+ 31+ 30+ 31+ 30+ + * 28+ 29+ 28+ 29+ 21+ + * 21+ 28+ 21+ 28+ 10 + * 10 21+ 10 21+ + * 11 11 + * + * So the greatest amount is added to tmp2[10] and tmp2[12]. If + * tmp2[10/12] has an initial value of <2**29, then the maximum value + * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, + * as required. + */ + tmp2[i + 3] += (x << 10) & kBottom28Bits; + tmp2[i + 4] += (x >> 18); + + tmp2[i + 6] += (x << 21) & kBottom29Bits; + tmp2[i + 7] += x >> 8; + + /* At position 200, which is the starting bit position for word 7, we + * have a factor of 0xf000000 = 2**28 - 2**24. + */ + tmp2[i + 7] += 0x10000000 & xMask; + /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */ + tmp2[i + 8] += (x - 1) & xMask; + tmp2[i + 7] -= (x << 24) & kBottom28Bits; + tmp2[i + 8] -= x >> 4; + + tmp2[i + 8] += 0x20000000 & xMask; + tmp2[i + 8] -= x; + tmp2[i + 8] += (x << 28) & kBottom29Bits; + tmp2[i + 9] += ((x >> 1) - 1) & xMask; + + if (i + 1 == NLIMBS) + break; + tmp2[i + 2] += tmp2[i + 1] >> 28; + x = tmp2[i + 1] & kBottom28Bits; + xMask = NON_ZERO_TO_ALL_ONES(x); + tmp2[i + 1] = 0; + + tmp2[i + 4] += (x << 11) & kBottom29Bits; + tmp2[i + 5] += (x >> 18); + + tmp2[i + 7] += (x << 21) & kBottom28Bits; + tmp2[i + 8] += x >> 7; + + /* At position 199, which is the starting bit of the 8th word when + * dealing with a context starting on an odd word, we have a factor of + * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th + * word from i+1 is i+8. + */ + tmp2[i + 8] += 0x20000000 & xMask; + tmp2[i + 9] += (x - 1) & xMask; + tmp2[i + 8] -= (x << 25) & kBottom29Bits; + tmp2[i + 9] -= x >> 4; + + tmp2[i + 9] += 0x10000000 & xMask; + tmp2[i + 9] -= x; + tmp2[i + 10] += (x - 1) & xMask; + } + + /* We merge the right shift with a carry chain. The words above 2**257 have + * widths of 28,29,... which we need to correct when copying them down. + */ + carry = 0; + for (i = 0; i < 8; i++) { + /* The maximum value of tmp2[i + 9] occurs on the first iteration and + * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is + * therefore safe. + */ + out[i] = tmp2[i + 9]; + out[i] += carry; + out[i] += (tmp2[i + 10] << 28) & kBottom29Bits; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + out[i] = tmp2[i + 9] >> 1; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + out[8] = tmp2[17]; + out[8] += carry; + carry = out[8] >> 29; + out[8] &= kBottom29Bits; + + felem_reduce_carry(out, carry); +} + +/* felem_square sets out=in*in. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_square(felem out, const felem in) +{ + u64 tmp[17]; + + tmp[0] = ((u64)in[0]) * in[0]; + tmp[1] = ((u64)in[0]) * (in[1] << 1); + tmp[2] = ((u64)in[0]) * (in[2] << 1) + + ((u64)in[1]) * (in[1] << 1); + tmp[3] = ((u64)in[0]) * (in[3] << 1) + + ((u64)in[1]) * (in[2] << 1); + tmp[4] = ((u64)in[0]) * (in[4] << 1) + + ((u64)in[1]) * (in[3] << 2) + + ((u64)in[2]) * in[2]; + tmp[5] = ((u64)in[0]) * (in[5] << 1) + + ((u64)in[1]) * (in[4] << 1) + + ((u64)in[2]) * (in[3] << 1); + tmp[6] = ((u64)in[0]) * (in[6] << 1) + + ((u64)in[1]) * (in[5] << 2) + + ((u64)in[2]) * (in[4] << 1) + + ((u64)in[3]) * (in[3] << 1); + tmp[7] = ((u64)in[0]) * (in[7] << 1) + + ((u64)in[1]) * (in[6] << 1) + + ((u64)in[2]) * (in[5] << 1) + + ((u64)in[3]) * (in[4] << 1); + /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, + * which is < 2**64 as required. + */ + tmp[8] = ((u64)in[0]) * (in[8] << 1) + + ((u64)in[1]) * (in[7] << 2) + + ((u64)in[2]) * (in[6] << 1) + + ((u64)in[3]) * (in[5] << 2) + + ((u64)in[4]) * in[4]; + tmp[9] = ((u64)in[1]) * (in[8] << 1) + + ((u64)in[2]) * (in[7] << 1) + + ((u64)in[3]) * (in[6] << 1) + + ((u64)in[4]) * (in[5] << 1); + tmp[10] = ((u64)in[2]) * (in[8] << 1) + + ((u64)in[3]) * (in[7] << 2) + + ((u64)in[4]) * (in[6] << 1) + + ((u64)in[5]) * (in[5] << 1); + tmp[11] = ((u64)in[3]) * (in[8] << 1) + + ((u64)in[4]) * (in[7] << 1) + + ((u64)in[5]) * (in[6] << 1); + tmp[12] = ((u64)in[4]) * (in[8] << 1) + + ((u64)in[5]) * (in[7] << 2) + + ((u64)in[6]) * in[6]; + tmp[13] = ((u64)in[5]) * (in[8] << 1) + + ((u64)in[6]) * (in[7] << 1); + tmp[14] = ((u64)in[6]) * (in[8] << 1) + + ((u64)in[7]) * (in[7] << 1); + tmp[15] = ((u64)in[7]) * (in[8] << 1); + tmp[16] = ((u64)in[8]) * in[8]; + + felem_reduce_degree(out, tmp); +} + +/* felem_mul sets out=in*in2. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and + * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_mul(felem out, const felem in, const felem in2) +{ + u64 tmp[17]; + + tmp[0] = ((u64)in[0]) * in2[0]; + tmp[1] = ((u64)in[0]) * (in2[1] << 0) + + ((u64)in[1]) * (in2[0] << 0); + tmp[2] = ((u64)in[0]) * (in2[2] << 0) + + ((u64)in[1]) * (in2[1] << 1) + + ((u64)in[2]) * (in2[0] << 0); + tmp[3] = ((u64)in[0]) * (in2[3] << 0) + + ((u64)in[1]) * (in2[2] << 0) + + ((u64)in[2]) * (in2[1] << 0) + + ((u64)in[3]) * (in2[0] << 0); + tmp[4] = ((u64)in[0]) * (in2[4] << 0) + + ((u64)in[1]) * (in2[3] << 1) + + ((u64)in[2]) * (in2[2] << 0) + + ((u64)in[3]) * (in2[1] << 1) + + ((u64)in[4]) * (in2[0] << 0); + tmp[5] = ((u64)in[0]) * (in2[5] << 0) + + ((u64)in[1]) * (in2[4] << 0) + + ((u64)in[2]) * (in2[3] << 0) + + ((u64)in[3]) * (in2[2] << 0) + + ((u64)in[4]) * (in2[1] << 0) + + ((u64)in[5]) * (in2[0] << 0); + tmp[6] = ((u64)in[0]) * (in2[6] << 0) + + ((u64)in[1]) * (in2[5] << 1) + + ((u64)in[2]) * (in2[4] << 0) + + ((u64)in[3]) * (in2[3] << 1) + + ((u64)in[4]) * (in2[2] << 0) + + ((u64)in[5]) * (in2[1] << 1) + + ((u64)in[6]) * (in2[0] << 0); + tmp[7] = ((u64)in[0]) * (in2[7] << 0) + + ((u64)in[1]) * (in2[6] << 0) + + ((u64)in[2]) * (in2[5] << 0) + + ((u64)in[3]) * (in2[4] << 0) + + ((u64)in[4]) * (in2[3] << 0) + + ((u64)in[5]) * (in2[2] << 0) + + ((u64)in[6]) * (in2[1] << 0) + + ((u64)in[7]) * (in2[0] << 0); + /* tmp[8] has the greatest value but doesn't overflow. See logic in + * felem_square. + */ + tmp[8] = ((u64)in[0]) * (in2[8] << 0) + + ((u64)in[1]) * (in2[7] << 1) + + ((u64)in[2]) * (in2[6] << 0) + + ((u64)in[3]) * (in2[5] << 1) + + ((u64)in[4]) * (in2[4] << 0) + + ((u64)in[5]) * (in2[3] << 1) + + ((u64)in[6]) * (in2[2] << 0) + + ((u64)in[7]) * (in2[1] << 1) + + ((u64)in[8]) * (in2[0] << 0); + tmp[9] = ((u64)in[1]) * (in2[8] << 0) + + ((u64)in[2]) * (in2[7] << 0) + + ((u64)in[3]) * (in2[6] << 0) + + ((u64)in[4]) * (in2[5] << 0) + + ((u64)in[5]) * (in2[4] << 0) + + ((u64)in[6]) * (in2[3] << 0) + + ((u64)in[7]) * (in2[2] << 0) + + ((u64)in[8]) * (in2[1] << 0); + tmp[10] = ((u64)in[2]) * (in2[8] << 0) + + ((u64)in[3]) * (in2[7] << 1) + + ((u64)in[4]) * (in2[6] << 0) + + ((u64)in[5]) * (in2[5] << 1) + + ((u64)in[6]) * (in2[4] << 0) + + ((u64)in[7]) * (in2[3] << 1) + + ((u64)in[8]) * (in2[2] << 0); + tmp[11] = ((u64)in[3]) * (in2[8] << 0) + + ((u64)in[4]) * (in2[7] << 0) + + ((u64)in[5]) * (in2[6] << 0) + + ((u64)in[6]) * (in2[5] << 0) + + ((u64)in[7]) * (in2[4] << 0) + + ((u64)in[8]) * (in2[3] << 0); + tmp[12] = ((u64)in[4]) * (in2[8] << 0) + + ((u64)in[5]) * (in2[7] << 1) + + ((u64)in[6]) * (in2[6] << 0) + + ((u64)in[7]) * (in2[5] << 1) + + ((u64)in[8]) * (in2[4] << 0); + tmp[13] = ((u64)in[5]) * (in2[8] << 0) + + ((u64)in[6]) * (in2[7] << 0) + + ((u64)in[7]) * (in2[6] << 0) + + ((u64)in[8]) * (in2[5] << 0); + tmp[14] = ((u64)in[6]) * (in2[8] << 0) + + ((u64)in[7]) * (in2[7] << 1) + + ((u64)in[8]) * (in2[6] << 0); + tmp[15] = ((u64)in[7]) * (in2[8] << 0) + + ((u64)in[8]) * (in2[7] << 0); + tmp[16] = ((u64)in[8]) * (in2[8] << 0); + + felem_reduce_degree(out, tmp); +} + +static void +felem_assign(felem out, const felem in) +{ + memcpy(out, in, sizeof(felem)); +} + +/* felem_inv calculates |out| = |in|^{-1} + * + * Based on Fermat's Little Theorem: + * a^p = a (mod p) + * a^{p-1} = 1 (mod p) + * a^{p-2} = a^{-1} (mod p) + */ +static void +felem_inv(felem out, const felem in) +{ + felem ftmp, ftmp2; + /* each e_I will hold |in|^{2^I - 1} */ + felem e2, e4, e8, e16, e32, e64; + unsigned int i; + + felem_square(ftmp, in); /* 2^1 */ + felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */ + felem_assign(e2, ftmp); + felem_square(ftmp, ftmp); /* 2^3 - 2^1 */ + felem_square(ftmp, ftmp); /* 2^4 - 2^2 */ + felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */ + felem_assign(e4, ftmp); + felem_square(ftmp, ftmp); /* 2^5 - 2^1 */ + felem_square(ftmp, ftmp); /* 2^6 - 2^2 */ + felem_square(ftmp, ftmp); /* 2^7 - 2^3 */ + felem_square(ftmp, ftmp); /* 2^8 - 2^4 */ + felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */ + felem_assign(e8, ftmp); + for (i = 0; i < 8; i++) { + felem_square(ftmp, ftmp); + } /* 2^16 - 2^8 */ + felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */ + felem_assign(e16, ftmp); + for (i = 0; i < 16; i++) { + felem_square(ftmp, ftmp); + } /* 2^32 - 2^16 */ + felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */ + felem_assign(e32, ftmp); + for (i = 0; i < 32; i++) { + felem_square(ftmp, ftmp); + } /* 2^64 - 2^32 */ + felem_assign(e64, ftmp); + felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */ + for (i = 0; i < 192; i++) { + felem_square(ftmp, ftmp); + } /* 2^256 - 2^224 + 2^192 */ + + felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */ + for (i = 0; i < 16; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^80 - 2^16 */ + felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */ + for (i = 0; i < 8; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^88 - 2^8 */ + felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */ + for (i = 0; i < 4; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^92 - 2^4 */ + felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */ + felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */ + felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */ + felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */ + felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */ + felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */ + felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */ + + felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ +} + +/* felem_scalar_3 sets out=3*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_3(felem out) +{ + limb carry = 0; + unsigned int i; + + for (i = 0;; i++) { + out[i] *= 3; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] *= 3; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_scalar_4 sets out=4*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_4(felem out) +{ + limb carry = 0, next_carry; + unsigned int i; + + for (i = 0;; i++) { + next_carry = out[i] >> 27; + out[i] <<= 2; + out[i] &= kBottom29Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 29); + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + next_carry = out[i] >> 26; + out[i] <<= 2; + out[i] &= kBottom28Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 28); + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_scalar_8 sets out=8*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_8(felem out) +{ + limb carry = 0, next_carry; + unsigned int i; + + for (i = 0;; i++) { + next_carry = out[i] >> 26; + out[i] <<= 3; + out[i] &= kBottom29Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 29); + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + next_carry = out[i] >> 25; + out[i] <<= 3; + out[i] &= kBottom28Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 28); + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of + * time depending on the value of |in|. + */ +static char +felem_is_zero_vartime(const felem in) +{ + limb carry; + int i; + limb tmp[NLIMBS]; + felem_assign(tmp, in); + + /* First, reduce tmp to a minimal form. + */ + do { + carry = 0; + for (i = 0;; i++) { + tmp[i] += carry; + carry = tmp[i] >> 29; + tmp[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + tmp[i] += carry; + carry = tmp[i] >> 28; + tmp[i] &= kBottom28Bits; + } + + felem_reduce_carry(tmp, carry); + } while (carry); + + /* tmp < 2**257, so the only possible zero values are 0, p and 2p. + */ + return memcmp(tmp, kZero, sizeof(tmp)) == 0 || + memcmp(tmp, kP, sizeof(tmp)) == 0 || + memcmp(tmp, k2P, sizeof(tmp)) == 0; +} + +/* Group operations: + * + * Elements of the elliptic curve group are represented in Jacobian + * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in + * Jacobian form. + */ + +/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l + */ +static void +point_double(felem x_out, felem y_out, felem z_out, + const felem x, const felem y, const felem z) +{ + felem delta, gamma, alpha, beta, tmp, tmp2; + + felem_square(delta, z); + felem_square(gamma, y); + felem_mul(beta, x, gamma); + + felem_sum(tmp, x, delta); + felem_diff(tmp2, x, delta); + felem_mul(alpha, tmp, tmp2); + felem_scalar_3(alpha); + + felem_sum(tmp, y, z); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, gamma); + felem_diff(z_out, tmp, delta); + + felem_scalar_4(beta); + felem_square(x_out, alpha); + felem_diff(x_out, x_out, beta); + felem_diff(x_out, x_out, beta); + + felem_diff(tmp, beta, x_out); + felem_mul(tmp, alpha, tmp); + felem_square(tmp2, gamma); + felem_scalar_8(tmp2); + felem_diff(y_out, tmp, tmp2); +} + +/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}. + * (i.e. the second point is affine.) + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * Note that this function does not handle P+P, infinity+P nor P+infinity + * correctly. + */ +static void +point_add_mixed(felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2) +{ + felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp; + + felem_square(z1z1, z1); + felem_sum(tmp, z1, z1); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, x1); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, y1); + felem_sum(r, r, r); + felem_mul(v, x1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, y1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * Note that this function does not handle P+P, infinity+P nor P+infinity + * correctly. + */ +static void +point_add(felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2, const felem z2) +{ + felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; + + felem_square(z1z1, z1); + felem_square(z2z2, z2); + felem_mul(u1, x1, z2z2); + + felem_sum(tmp, z1, z2); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, z1z1); + felem_diff(tmp, tmp, z2z2); + + felem_mul(z2z2z2, z2, z2z2); + felem_mul(s1, y1, z2z2z2); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, u1); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, s1); + felem_sum(r, r, r); + felem_mul(v, u1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, s1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} + + * {x2,y2,z2}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * This function handles the case where {x1,y1,z1}={x2,y2,z2}. + */ +static void +point_add_or_double_vartime( + felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2, const felem z2) +{ + felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; + char x_equal, y_equal; + + felem_square(z1z1, z1); + felem_square(z2z2, z2); + felem_mul(u1, x1, z2z2); + + felem_sum(tmp, z1, z2); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, z1z1); + felem_diff(tmp, tmp, z2z2); + + felem_mul(z2z2z2, z2, z2z2); + felem_mul(s1, y1, z2z2z2); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, u1); + x_equal = felem_is_zero_vartime(h); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, s1); + y_equal = felem_is_zero_vartime(r); + if (x_equal && y_equal) { + point_double(x_out, y_out, z_out, x1, y1, z1); + return; + } + felem_sum(r, r, r); + felem_mul(v, u1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, s1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* copy_conditional sets out=in if mask = 0xffffffff in constant time. + * + * On entry: mask is either 0 or 0xffffffff. + */ +static void +copy_conditional(felem out, const felem in, limb mask) +{ + int i; + + for (i = 0; i < NLIMBS; i++) { + const limb tmp = mask & (in[i] ^ out[i]); + out[i] ^= tmp; + } +} + +/* select_affine_point sets {out_x,out_y} to the index'th entry of table. + * On entry: index < 16, table[0] must be zero. + */ +static void +select_affine_point(felem out_x, felem out_y, + const limb *table, limb index) +{ + limb i, j; + + memset(out_x, 0, sizeof(felem)); + memset(out_y, 0, sizeof(felem)); + + for (i = 1; i < 16; i++) { + limb mask = i ^ index; + mask |= mask >> 2; + mask |= mask >> 1; + mask &= 1; + mask--; + for (j = 0; j < NLIMBS; j++, table++) { + out_x[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_y[j] |= *table & mask; + } + } +} + +/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of + * table. On entry: index < 16, table[0] must be zero. + */ +static void +select_jacobian_point(felem out_x, felem out_y, felem out_z, + const limb *table, limb index) +{ + limb i, j; + + memset(out_x, 0, sizeof(felem)); + memset(out_y, 0, sizeof(felem)); + memset(out_z, 0, sizeof(felem)); + + /* The implicit value at index 0 is all zero. We don't need to perform that + * iteration of the loop because we already set out_* to zero. + */ + table += 3 * NLIMBS; + + for (i = 1; i < 16; i++) { + limb mask = i ^ index; + mask |= mask >> 2; + mask |= mask >> 1; + mask &= 1; + mask--; + for (j = 0; j < NLIMBS; j++, table++) { + out_x[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_y[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_z[j] |= *table & mask; + } + } +} + +/* get_bit returns the bit'th bit of scalar. */ +static char +get_bit(const u8 scalar[32], int bit) +{ + return ((scalar[bit >> 3]) >> (bit & 7)) & 1; +} + +/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian + * number. Note that the value of scalar must be less than the order of the + * group. + */ +static void +scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32]) +{ + int i, j; + limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask; + u32 table_offset; + + felem px, py; + felem tx, ty, tz; + + memset(nx, 0, sizeof(felem)); + memset(ny, 0, sizeof(felem)); + memset(nz, 0, sizeof(felem)); + + /* The loop adds bits at positions 0, 64, 128 and 192, followed by + * positions 32,96,160 and 224 and does this 32 times. + */ + for (i = 0; i < 32; i++) { + if (i) { + point_double(nx, ny, nz, nx, ny, nz); + } + table_offset = 0; + for (j = 0; j <= 32; j += 32) { + char bit0 = get_bit(scalar, 31 - i + j); + char bit1 = get_bit(scalar, 95 - i + j); + char bit2 = get_bit(scalar, 159 - i + j); + char bit3 = get_bit(scalar, 223 - i + j); + limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); + + select_affine_point(px, py, kPrecomputed + table_offset, index); + table_offset += 30 * NLIMBS; + + /* Since scalar is less than the order of the group, we know that + * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle + * below. + */ + point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); + /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero + * (a.k.a. the point at infinity). We handle that situation by + * copying the point from the table. + */ + copy_conditional(nx, px, n_is_infinity_mask); + copy_conditional(ny, py, n_is_infinity_mask); + copy_conditional(nz, kOne, n_is_infinity_mask); + + /* Equally, the result is also wrong if the point from the table is + * zero, which happens when the index is zero. We handle that by + * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0. + */ + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); + mask = p_is_noninfinite_mask & ~n_is_infinity_mask; + copy_conditional(nx, tx, mask); + copy_conditional(ny, ty, mask); + copy_conditional(nz, tz, mask); + /* If p was not zero, then n is now non-zero. */ + n_is_infinity_mask &= ~p_is_noninfinite_mask; + } + } +} + +/* point_to_affine converts a Jacobian point to an affine point. If the input + * is the point at infinity then it returns (0, 0) in constant time. + */ +static void +point_to_affine(felem x_out, felem y_out, + const felem nx, const felem ny, const felem nz) +{ + felem z_inv, z_inv_sq; + felem_inv(z_inv, nz); + felem_square(z_inv_sq, z_inv); + felem_mul(x_out, nx, z_inv_sq); + felem_mul(z_inv, z_inv, z_inv_sq); + felem_mul(y_out, ny, z_inv); +} + +/* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */ +static void +scalar_mult(felem nx, felem ny, felem nz, + const felem x, const felem y, const u8 scalar[32]) +{ + int i; + felem px, py, pz, tx, ty, tz; + felem precomp[16][3]; + limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask; + + /* We precompute 0,1,2,... times {x,y}. */ + memset(precomp, 0, sizeof(felem) * 3); + memcpy(&precomp[1][0], x, sizeof(felem)); + memcpy(&precomp[1][1], y, sizeof(felem)); + memcpy(&precomp[1][2], kOne, sizeof(felem)); + + for (i = 2; i < 16; i += 2) { + point_double(precomp[i][0], precomp[i][1], precomp[i][2], + precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]); + + point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2], + precomp[i][0], precomp[i][1], precomp[i][2], x, y); + } + + memset(nx, 0, sizeof(felem)); + memset(ny, 0, sizeof(felem)); + memset(nz, 0, sizeof(felem)); + n_is_infinity_mask = -1; + + /* We add in a window of four bits each iteration and do this 64 times. */ + for (i = 0; i < 64; i++) { + if (i) { + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + } + + index = scalar[31 - i / 2]; + if ((i & 1) == 1) { + index &= 15; + } else { + index >>= 4; + } + + /* See the comments in scalar_base_mult about handling infinities. */ + select_jacobian_point(px, py, pz, precomp[0][0], index); + point_add(tx, ty, tz, nx, ny, nz, px, py, pz); + copy_conditional(nx, px, n_is_infinity_mask); + copy_conditional(ny, py, n_is_infinity_mask); + copy_conditional(nz, pz, n_is_infinity_mask); + + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); + mask = p_is_noninfinite_mask & ~n_is_infinity_mask; + copy_conditional(nx, tx, mask); + copy_conditional(ny, ty, mask); + copy_conditional(nz, tz, mask); + n_is_infinity_mask &= ~p_is_noninfinite_mask; + } +} + +/* Interface with Freebl: */ + +/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to + * little-endian order. + */ +#ifdef IS_BIG_ENDIAN +#ifdef __APPLE__ +#include +#define BYTESWAP32(x) OSSwapInt32(x) +#define BYTESWAP64(x) OSSwapInt64(x) +#else +#define BYTESWAP32(x) \ + (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x)&0xff00) << 8) | ((x) << 24)) +#define BYTESWAP64(x) \ + (((x) >> 56) | (((x) >> 40) & 0xff00) | \ + (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \ + (((x)&0xff000000) << 8) | (((x)&0xff0000) << 24) | \ + (((x)&0xff00) << 40) | ((x) << 56)) +#endif + +#ifdef MP_USE_UINT_DIGIT +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x) +#else +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x) +#endif +#endif /* IS_BIG_ENDIAN */ + +#ifdef MP_USE_UINT_DIGIT +static const mp_digit kRInvDigits[8] = { + 0x80000000, 1, 0xffffffff, 0, + 0x80000001, 0xfffffffe, 1, 0x7fffffff +}; +#else +static const mp_digit kRInvDigits[4] = { + PR_UINT64(0x180000000), 0xffffffff, + PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) +}; +#endif +#define MP_DIGITS_IN_256_BITS (32 / sizeof(mp_digit)) +static const mp_int kRInv = { + MP_ZPOS, + MP_DIGITS_IN_256_BITS, + MP_DIGITS_IN_256_BITS, + (mp_digit *)kRInvDigits +}; + +static const limb kTwo28 = 0x10000000; +static const limb kTwo29 = 0x20000000; + +/* to_montgomery sets out = R*in. */ +static mp_err +to_montgomery(felem out, const mp_int *in, const ECGroup *group) +{ + /* There are no MPI functions for bitshift operations and we wish to shift + * in 257 bits left so we move the digits 256-bits left and then multiply + * by two. + */ + mp_int in_shifted; + int i; + mp_err res; + + MP_CHECKOK(mp_init(&in_shifted)); + MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS)); + memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS), + MP_DIGITS(in), + MP_USED(in) * sizeof(mp_digit)); + MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted)); + MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth)); + + for (i = 0;; i++) { + out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits; + MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL)); + + i++; + if (i == NLIMBS) + break; + out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits; + MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL)); + } + +CLEANUP: + mp_clear(&in_shifted); + return res; +} + +/* from_montgomery sets out=in/R. */ +static mp_err +from_montgomery(mp_int *out, const felem in, + const ECGroup *group) +{ + mp_int result, tmp; + mp_err res; + int i; + + MP_CHECKOK(mp_init(&result)); + MP_CHECKOK(mp_init(&tmp)); + + MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS - 1], &result)); + for (i = NLIMBS - 2; i >= 0; i--) { + if ((i & 1) == 0) { + MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp)); + } else { + MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp)); + } + MP_CHECKOK(mp_add_d(&tmp, in[i], &result)); + } + + MP_CHECKOK(mp_mul(&result, &kRInv, out)); + MP_CHECKOK(group->meth->field_mod(out, out, group->meth)); + +CLEANUP: + mp_clear(&result); + mp_clear(&tmp); + return res; +} + +/* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ +static void +scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) +{ + /* We require that |n| is less than the order of the group and therefore it + * will fit into |out_scalar|. However, these is a timing side-channel here + * that we cannot avoid: if |n| is sufficiently small it may be one or more + * words too short and we'll copy less data. + */ + memset(out_scalar, 0, 32); +#ifdef IS_LITTLE_ENDIAN + memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); +#else + { + mp_size i; + mp_digit swapped[MP_DIGITS_IN_256_BITS]; + for (i = 0; i < MP_USED(n); i++) { + swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); + } + memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); + } +#endif +} + +/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the + * order of the group. + */ +static mp_err +ec_GFp_nistp256_base_point_mul(const mp_int *n, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar[32]; + felem x, y, z, x_affine, y_affine; + mp_err res; + + /* FIXME(agl): test that n < order. */ + + scalar_from_mp_int(scalar, n); + scalar_base_mult(x, y, z, scalar); + point_to_affine(x_affine, y_affine, x, y, z); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is < + * the order of the group. + */ +static mp_err +ec_GFp_nistp256_point_mul(const mp_int *n, + const mp_int *in_x, const mp_int *in_y, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar[32]; + felem x, y, z, x_affine, y_affine, px, py; + mp_err res; + + scalar_from_mp_int(scalar, n); + + MP_CHECKOK(to_montgomery(px, in_x, group)); + MP_CHECKOK(to_montgomery(py, in_y, group)); + + scalar_mult(x, y, z, px, py, scalar); + point_to_affine(x_affine, y_affine, x, y, z); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G + + * n2*{in_x,in_y}, where n1 and n2 are < the order of the group. + * + * As indicated by the name, this function operates in variable time. This + * is safe because it's used for signature validation which doesn't deal + * with secrets. + */ +static mp_err +ec_GFp_nistp256_points_mul_vartime( + const mp_int *n1, const mp_int *n2, + const mp_int *in_x, const mp_int *in_y, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar1[32], scalar2[32]; + felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py; + mp_err res = MP_OKAY; + + /* If n2 == NULL, this is just a base-point multiplication. */ + if (n2 == NULL) { + return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group); + } + + /* If n1 == nULL, this is just an arbitary-point multiplication. */ + if (n1 == NULL) { + return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group); + } + + /* If both scalars are zero, then the result is the point at infinity. */ + if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) { + mp_zero(out_x); + mp_zero(out_y); + return res; + } + + scalar_from_mp_int(scalar1, n1); + scalar_from_mp_int(scalar2, n2); + + MP_CHECKOK(to_montgomery(px, in_x, group)); + MP_CHECKOK(to_montgomery(py, in_y, group)); + scalar_base_mult(x1, y1, z1, scalar1); + scalar_mult(x2, y2, z2, px, py, scalar2); + + if (mp_cmp_z(n2) == 0) { + /* If n2 == 0, then {x2,y2,z2} is zero and the result is just + * {x1,y1,z1}. */ + } else if (mp_cmp_z(n1) == 0) { + /* If n1 == 0, then {x1,y1,z1} is zero and the result is just + * {x2,y2,z2}. */ + memcpy(x1, x2, sizeof(x2)); + memcpy(y1, y2, sizeof(y2)); + memcpy(z1, z2, sizeof(z2)); + } else { + /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */ + point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2); + } + + point_to_affine(x_affine, y_affine, x1, y1, z1); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* Wire in fast point multiplication for named curves. */ +mp_err +ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P256) { + group->base_point_mul = &ec_GFp_nistp256_base_point_mul; + group->point_mul = &ec_GFp_nistp256_point_mul; + group->points_mul = &ec_GFp_nistp256_points_mul_vartime; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_384.c b/security/nss/lib/freebl/ecl/ecp_384.c new file mode 100644 index 0000000000..702fd976ed --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_384.c @@ -0,0 +1,258 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1. a can be r. + * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + int a_bits = mpl_significant_bits(a); + int i; + + /* m1, m2 are statically-allocated mp_int of exactly the size we need */ + mp_int m[10]; + +#ifdef ECL_THIRTY_TWO_BIT + mp_digit s[10][12]; + for (i = 0; i < 10; i++) { + MP_SIGN(&m[i]) = MP_ZPOS; + MP_ALLOC(&m[i]) = 12; + MP_USED(&m[i]) = 12; + MP_DIGITS(&m[i]) = s[i]; + } +#else + mp_digit s[10][6]; + for (i = 0; i < 10; i++) { + MP_SIGN(&m[i]) = MP_ZPOS; + MP_ALLOC(&m[i]) = 6; + MP_USED(&m[i]) = 6; + MP_DIGITS(&m[i]) = s[i]; + } +#endif + +#ifdef ECL_THIRTY_TWO_BIT + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if ((a_bits > 768) || (a_bits <= 736)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + for (i = 0; i < 12; i++) { + s[0][i] = MP_DIGIT(a, i); + } + s[1][0] = 0; + s[1][1] = 0; + s[1][2] = 0; + s[1][3] = 0; + s[1][4] = MP_DIGIT(a, 21); + s[1][5] = MP_DIGIT(a, 22); + s[1][6] = MP_DIGIT(a, 23); + s[1][7] = 0; + s[1][8] = 0; + s[1][9] = 0; + s[1][10] = 0; + s[1][11] = 0; + for (i = 0; i < 12; i++) { + s[2][i] = MP_DIGIT(a, i + 12); + } + s[3][0] = MP_DIGIT(a, 21); + s[3][1] = MP_DIGIT(a, 22); + s[3][2] = MP_DIGIT(a, 23); + for (i = 3; i < 12; i++) { + s[3][i] = MP_DIGIT(a, i + 9); + } + s[4][0] = 0; + s[4][1] = MP_DIGIT(a, 23); + s[4][2] = 0; + s[4][3] = MP_DIGIT(a, 20); + for (i = 4; i < 12; i++) { + s[4][i] = MP_DIGIT(a, i + 8); + } + s[5][0] = 0; + s[5][1] = 0; + s[5][2] = 0; + s[5][3] = 0; + s[5][4] = MP_DIGIT(a, 20); + s[5][5] = MP_DIGIT(a, 21); + s[5][6] = MP_DIGIT(a, 22); + s[5][7] = MP_DIGIT(a, 23); + s[5][8] = 0; + s[5][9] = 0; + s[5][10] = 0; + s[5][11] = 0; + s[6][0] = MP_DIGIT(a, 20); + s[6][1] = 0; + s[6][2] = 0; + s[6][3] = MP_DIGIT(a, 21); + s[6][4] = MP_DIGIT(a, 22); + s[6][5] = MP_DIGIT(a, 23); + s[6][6] = 0; + s[6][7] = 0; + s[6][8] = 0; + s[6][9] = 0; + s[6][10] = 0; + s[6][11] = 0; + s[7][0] = MP_DIGIT(a, 23); + for (i = 1; i < 12; i++) { + s[7][i] = MP_DIGIT(a, i + 11); + } + s[8][0] = 0; + s[8][1] = MP_DIGIT(a, 20); + s[8][2] = MP_DIGIT(a, 21); + s[8][3] = MP_DIGIT(a, 22); + s[8][4] = MP_DIGIT(a, 23); + s[8][5] = 0; + s[8][6] = 0; + s[8][7] = 0; + s[8][8] = 0; + s[8][9] = 0; + s[8][10] = 0; + s[8][11] = 0; + s[9][0] = 0; + s[9][1] = 0; + s[9][2] = 0; + s[9][3] = MP_DIGIT(a, 23); + s[9][4] = MP_DIGIT(a, 23); + s[9][5] = 0; + s[9][6] = 0; + s[9][7] = 0; + s[9][8] = 0; + s[9][9] = 0; + s[9][10] = 0; + s[9][11] = 0; + + MP_CHECKOK(mp_add(&m[0], &m[1], r)); + MP_CHECKOK(mp_add(r, &m[1], r)); + MP_CHECKOK(mp_add(r, &m[2], r)); + MP_CHECKOK(mp_add(r, &m[3], r)); + MP_CHECKOK(mp_add(r, &m[4], r)); + MP_CHECKOK(mp_add(r, &m[5], r)); + MP_CHECKOK(mp_add(r, &m[6], r)); + MP_CHECKOK(mp_sub(r, &m[7], r)); + MP_CHECKOK(mp_sub(r, &m[8], r)); + MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r)); + s_mp_clamp(r); + } +#else + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if ((a_bits > 768) || (a_bits <= 736)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + for (i = 0; i < 6; i++) { + s[0][i] = MP_DIGIT(a, i); + } + s[1][0] = 0; + s[1][1] = 0; + s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[1][3] = MP_DIGIT(a, 11) >> 32; + s[1][4] = 0; + s[1][5] = 0; + for (i = 0; i < 6; i++) { + s[2][i] = MP_DIGIT(a, i + 6); + } + s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32); + for (i = 2; i < 6; i++) { + s[3][i] = (MP_DIGIT(a, i + 4) >> 32) | (MP_DIGIT(a, i + 5) << 32); + } + s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32; + s[4][1] = MP_DIGIT(a, 10) << 32; + for (i = 2; i < 6; i++) { + s[4][i] = MP_DIGIT(a, i + 4); + } + s[5][0] = 0; + s[5][1] = 0; + s[5][2] = MP_DIGIT(a, 10); + s[5][3] = MP_DIGIT(a, 11); + s[5][4] = 0; + s[5][5] = 0; + s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32; + s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32; + s[6][2] = MP_DIGIT(a, 11); + s[6][3] = 0; + s[6][4] = 0; + s[6][5] = 0; + s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32); + for (i = 1; i < 6; i++) { + s[7][i] = (MP_DIGIT(a, i + 5) >> 32) | (MP_DIGIT(a, i + 6) << 32); + } + s[8][0] = MP_DIGIT(a, 10) << 32; + s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[8][2] = MP_DIGIT(a, 11) >> 32; + s[8][3] = 0; + s[8][4] = 0; + s[8][5] = 0; + s[9][0] = 0; + s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32; + s[9][2] = MP_DIGIT(a, 11) >> 32; + s[9][3] = 0; + s[9][4] = 0; + s[9][5] = 0; + + MP_CHECKOK(mp_add(&m[0], &m[1], r)); + MP_CHECKOK(mp_add(r, &m[1], r)); + MP_CHECKOK(mp_add(r, &m[2], r)); + MP_CHECKOK(mp_add(r, &m[3], r)); + MP_CHECKOK(mp_add(r, &m[4], r)); + MP_CHECKOK(mp_add(r, &m[5], r)); + MP_CHECKOK(mp_add(r, &m[6], r)); + MP_CHECKOK(mp_sub(r, &m[7], r)); + MP_CHECKOK(mp_sub(r, &m[8], r)); + MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r)); + s_mp_clamp(r); + } +#endif + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p384. Store the + * result in r. r could be a. Uses optimized modular reduction for p384. + */ +static mp_err +ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p384. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p384. */ +static mp_err +ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp384(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P384) { + group->meth->field_mod = &ec_GFp_nistp384_mod; + group->meth->field_mul = &ec_GFp_nistp384_mul; + group->meth->field_sqr = &ec_GFp_nistp384_sqr; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_521.c b/security/nss/lib/freebl/ecl/ecp_521.c new file mode 100644 index 0000000000..6ca0dbb11f --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_521.c @@ -0,0 +1,137 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +#define ECP521_DIGITS ECL_CURVE_DIGITS(521) + +/* Fast modular reduction for p521 = 2^521 - 1. a can be r. Uses + * algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + int a_bits = mpl_significant_bits(a); + unsigned int i; + + /* m1, m2 are statically-allocated mp_int of exactly the size we need */ + mp_int m1; + + mp_digit s1[ECP521_DIGITS] = { 0 }; + + MP_SIGN(&m1) = MP_ZPOS; + MP_ALLOC(&m1) = ECP521_DIGITS; + MP_USED(&m1) = ECP521_DIGITS; + MP_DIGITS(&m1) = s1; + + if (a_bits < 521) { + if (a == r) + return MP_OKAY; + return mp_copy(a, r); + } + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if (a_bits > (521 * 2)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { +#define FIRST_DIGIT (ECP521_DIGITS - 1) + for (i = FIRST_DIGIT; i < MP_USED(a) - 1; i++) { + s1[i - FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9) | (MP_DIGIT(a, 1 + i) << (MP_DIGIT_BIT - 9)); + } + s1[i - FIRST_DIGIT] = MP_DIGIT(a, i) >> 9; + + if (a != r) { + MP_CHECKOK(s_mp_pad(r, ECP521_DIGITS)); + for (i = 0; i < ECP521_DIGITS; i++) { + MP_DIGIT(r, i) = MP_DIGIT(a, i); + } + } + MP_USED(r) = ECP521_DIGITS; + MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF; + + MP_CHECKOK(s_mp_add(r, &m1)); + if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) { + MP_CHECKOK(s_mp_add_d(r, 1)); + MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF; + } else if (s_mp_cmp(r, &meth->irr) == 0) { + mp_zero(r); + } + s_mp_clamp(r); + } + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p521. Store the + * result in r. r could be a. Uses optimized modular reduction for p521. + */ +static mp_err +ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p521. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p521. */ +static mp_err +ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +static mp_err +ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mul(a, &t, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); + CLEANUP: + mp_clear(&t); + return res; + } +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp521(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P521) { + group->meth->field_mod = &ec_GFp_nistp521_mod; + group->meth->field_mul = &ec_GFp_nistp521_mul; + group->meth->field_sqr = &ec_GFp_nistp521_sqr; + group->meth->field_div = &ec_GFp_nistp521_div; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_aff.c b/security/nss/lib/freebl/ecl/ecp_aff.c new file mode 100644 index 0000000000..2f8802e8d0 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_aff.c @@ -0,0 +1,308 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mplogic.h" +#include + +/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */ +mp_err +ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py) +{ + + if ((mp_cmp_z(px) == 0) && (mp_cmp_z(py) == 0)) { + return MP_YES; + } else { + return MP_NO; + } +} + +/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */ +mp_err +ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py) +{ + mp_zero(px); + mp_zero(py); + return MP_OKAY; +} + +/* Computes R = P + Q based on IEEE P1363 A.10.1. Elliptic curve points P, + * Q, and R can all be identical. Uses affine coordinates. Assumes input + * is already field-encoded using field_enc, and returns output that is + * still field-encoded. */ +mp_err +ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int lambda, temp, tempx, tempy; + + MP_DIGITS(&lambda) = 0; + MP_DIGITS(&temp) = 0; + MP_DIGITS(&tempx) = 0; + MP_DIGITS(&tempy) = 0; + MP_CHECKOK(mp_init(&lambda)); + MP_CHECKOK(mp_init(&temp)); + MP_CHECKOK(mp_init(&tempx)); + MP_CHECKOK(mp_init(&tempy)); + /* if P = inf, then R = Q */ + if (ec_GFp_pt_is_inf_aff(px, py) == 0) { + MP_CHECKOK(mp_copy(qx, rx)); + MP_CHECKOK(mp_copy(qy, ry)); + res = MP_OKAY; + goto CLEANUP; + } + /* if Q = inf, then R = P */ + if (ec_GFp_pt_is_inf_aff(qx, qy) == 0) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + res = MP_OKAY; + goto CLEANUP; + } + /* if px != qx, then lambda = (py-qy) / (px-qx) */ + if (mp_cmp(px, qx) != 0) { + MP_CHECKOK(group->meth->field_sub(py, qy, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_sub(px, qx, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_div(&tempy, &tempx, &lambda, group->meth)); + } else { + /* if py != qy or qy = 0, then R = inf */ + if (((mp_cmp(py, qy) != 0)) || (mp_cmp_z(qy) == 0)) { + mp_zero(rx); + mp_zero(ry); + res = MP_OKAY; + goto CLEANUP; + } + /* lambda = (3qx^2+a) / (2qy) */ + MP_CHECKOK(group->meth->field_sqr(qx, &tempx, group->meth)); + MP_CHECKOK(mp_set_int(&temp, 3)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth)); + } + MP_CHECKOK(group->meth->field_mul(&tempx, &temp, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_add(&tempx, &group->curvea, &tempx, group->meth)); + MP_CHECKOK(mp_set_int(&temp, 2)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth)); + } + MP_CHECKOK(group->meth->field_mul(qy, &temp, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_div(&tempx, &tempy, &lambda, group->meth)); + } + /* rx = lambda^2 - px - qx */ + MP_CHECKOK(group->meth->field_sqr(&lambda, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempx, px, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempx, qx, &tempx, group->meth)); + /* ry = (x1-x2) * lambda - y1 */ + MP_CHECKOK(group->meth->field_sub(qx, &tempx, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_mul(&tempy, &lambda, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempy, qy, &tempy, group->meth)); + MP_CHECKOK(mp_copy(&tempx, rx)); + MP_CHECKOK(mp_copy(&tempy, ry)); + +CLEANUP: + mp_clear(&lambda); + mp_clear(&temp); + mp_clear(&tempx); + mp_clear(&tempy); + return res; +} + +/* Computes R = P - Q. Elliptic curve points P, Q, and R can all be + * identical. Uses affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int nqy; + + MP_DIGITS(&nqy) = 0; + MP_CHECKOK(mp_init(&nqy)); + /* nqy = -qy */ + MP_CHECKOK(group->meth->field_neg(qy, &nqy, group->meth)); + res = group->point_add(px, py, qx, &nqy, rx, ry, group); +CLEANUP: + mp_clear(&nqy); + return res; +} + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * affine coordinates. Assumes input is already field-encoded using + * field_enc, and returns output that is still field-encoded. */ +mp_err +ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group) +{ + return ec_GFp_pt_add_aff(px, py, px, py, rx, ry, group); +} + +/* by default, this routine is unused and thus doesn't need to be compiled */ +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF +/* Computes R = nP based on IEEE P1363 A.10.3. Elliptic curve points P and + * R can be identical. Uses affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int k, k3, qx, qy, sx, sy; + int b1, b3, i, l; + + MP_DIGITS(&k) = 0; + MP_DIGITS(&k3) = 0; + MP_DIGITS(&qx) = 0; + MP_DIGITS(&qy) = 0; + MP_DIGITS(&sx) = 0; + MP_DIGITS(&sy) = 0; + MP_CHECKOK(mp_init(&k)); + MP_CHECKOK(mp_init(&k3)); + MP_CHECKOK(mp_init(&qx)); + MP_CHECKOK(mp_init(&qy)); + MP_CHECKOK(mp_init(&sx)); + MP_CHECKOK(mp_init(&sy)); + + /* if n = 0 then r = inf */ + if (mp_cmp_z(n) == 0) { + mp_zero(rx); + mp_zero(ry); + res = MP_OKAY; + goto CLEANUP; + } + /* Q = P, k = n */ + MP_CHECKOK(mp_copy(px, &qx)); + MP_CHECKOK(mp_copy(py, &qy)); + MP_CHECKOK(mp_copy(n, &k)); + /* if n < 0 then Q = -Q, k = -k */ + if (mp_cmp_z(n) < 0) { + MP_CHECKOK(group->meth->field_neg(&qy, &qy, group->meth)); + MP_CHECKOK(mp_neg(&k, &k)); + } +#ifdef ECL_DEBUG /* basic double and add method */ + l = mpl_significant_bits(&k) - 1; + MP_CHECKOK(mp_copy(&qx, &sx)); + MP_CHECKOK(mp_copy(&qy, &sy)); + for (i = l - 1; i >= 0; i--) { + /* S = 2S */ + MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group)); + /* if k_i = 1, then S = S + Q */ + if (mpl_get_bit(&k, i) != 0) { + MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group)); + } + } +#else /* double and add/subtract method from \ + * standard */ + /* k3 = 3 * k */ + MP_CHECKOK(mp_set_int(&k3, 3)); + MP_CHECKOK(mp_mul(&k, &k3, &k3)); + /* S = Q */ + MP_CHECKOK(mp_copy(&qx, &sx)); + MP_CHECKOK(mp_copy(&qy, &sy)); + /* l = index of high order bit in binary representation of 3*k */ + l = mpl_significant_bits(&k3) - 1; + /* for i = l-1 downto 1 */ + for (i = l - 1; i >= 1; i--) { + /* S = 2S */ + MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group)); + b3 = MP_GET_BIT(&k3, i); + b1 = MP_GET_BIT(&k, i); + /* if k3_i = 1 and k_i = 0, then S = S + Q */ + if ((b3 == 1) && (b1 == 0)) { + MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group)); + /* if k3_i = 0 and k_i = 1, then S = S - Q */ + } else if ((b3 == 0) && (b1 == 1)) { + MP_CHECKOK(group->point_sub(&sx, &sy, &qx, &qy, &sx, &sy, group)); + } + } +#endif + /* output S */ + MP_CHECKOK(mp_copy(&sx, rx)); + MP_CHECKOK(mp_copy(&sy, ry)); + +CLEANUP: + mp_clear(&k); + mp_clear(&k3); + mp_clear(&qx); + mp_clear(&qy); + mp_clear(&sx); + mp_clear(&sy); + return res; +} +#endif + +/* Validates a point on a GFp curve. */ +mp_err +ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group) +{ + mp_err res = MP_NO; + mp_int accl, accr, tmp, pxt, pyt; + + MP_DIGITS(&accl) = 0; + MP_DIGITS(&accr) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&pxt) = 0; + MP_DIGITS(&pyt) = 0; + MP_CHECKOK(mp_init(&accl)); + MP_CHECKOK(mp_init(&accr)); + MP_CHECKOK(mp_init(&tmp)); + MP_CHECKOK(mp_init(&pxt)); + MP_CHECKOK(mp_init(&pyt)); + + /* 1: Verify that publicValue is not the point at infinity */ + if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { + res = MP_NO; + goto CLEANUP; + } + /* 2: Verify that the coordinates of publicValue are elements + * of the field. + */ + if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) || + (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) { + res = MP_NO; + goto CLEANUP; + } + /* 3: Verify that publicValue is on the curve. */ + if (group->meth->field_enc) { + group->meth->field_enc(px, &pxt, group->meth); + group->meth->field_enc(py, &pyt, group->meth); + } else { + MP_CHECKOK(mp_copy(px, &pxt)); + MP_CHECKOK(mp_copy(py, &pyt)); + } + /* left-hand side: y^2 */ + MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth)); + /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */ + MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth)); + MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth)); + MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth)); + MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth)); + /* check LHS - RHS == 0 */ + MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth)); + if (mp_cmp_z(&accr) != 0) { + res = MP_NO; + goto CLEANUP; + } + /* 4: Verify that the order of the curve times the publicValue + * is the point at infinity. + */ + MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt)); + if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) { + res = MP_NO; + goto CLEANUP; + } + + res = MP_YES; + +CLEANUP: + mp_clear(&accl); + mp_clear(&accr); + mp_clear(&tmp); + mp_clear(&pxt); + mp_clear(&pyt); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_jac.c b/security/nss/lib/freebl/ecl/ecp_jac.c new file mode 100644 index 0000000000..535e75903f --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_jac.c @@ -0,0 +1,513 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mplogic.h" +#include +#ifdef ECL_DEBUG +#include +#endif + +/* Converts a point P(px, py) from affine coordinates to Jacobian + * projective coordinates R(rx, ry, rz). Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + + if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + } else { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_set_int(rz, 1)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(rz, rz, group->meth)); + } + } +CLEANUP: + return res; +} + +/* Converts a point P(px, py, pz) from Jacobian projective coordinates to + * affine coordinates R(rx, ry). P and R can share x and y coordinates. + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. */ +mp_err +ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, const mp_int *pz, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int z1, z2, z3; + + MP_DIGITS(&z1) = 0; + MP_DIGITS(&z2) = 0; + MP_DIGITS(&z3) = 0; + MP_CHECKOK(mp_init(&z1)); + MP_CHECKOK(mp_init(&z2)); + MP_CHECKOK(mp_init(&z3)); + + /* if point at infinity, then set point at infinity and exit */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_set_inf_aff(rx, ry)); + goto CLEANUP; + } + + /* transform (px, py, pz) into (px / pz^2, py / pz^3) */ + if (mp_cmp_d(pz, 1) == 0) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + } else { + MP_CHECKOK(group->meth->field_div(NULL, pz, &z1, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&z1, &z2, group->meth)); + MP_CHECKOK(group->meth->field_mul(&z1, &z2, &z3, group->meth)); + MP_CHECKOK(group->meth->field_mul(px, &z2, rx, group->meth)); + MP_CHECKOK(group->meth->field_mul(py, &z3, ry, group->meth)); + } + +CLEANUP: + mp_clear(&z1); + mp_clear(&z2); + mp_clear(&z3); + return res; +} + +/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian + * coordinates. */ +mp_err +ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, const mp_int *pz) +{ + return mp_cmp_z(pz); +} + +/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian + * coordinates. */ +mp_err +ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz) +{ + mp_zero(pz); + return MP_OKAY; +} + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. + * Uses mixed Jacobian-affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and + * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime + * Fields. */ +mp_err +ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int A, B, C, D, C2, C3; + + MP_DIGITS(&A) = 0; + MP_DIGITS(&B) = 0; + MP_DIGITS(&C) = 0; + MP_DIGITS(&D) = 0; + MP_DIGITS(&C2) = 0; + MP_DIGITS(&C3) = 0; + MP_CHECKOK(mp_init(&A)); + MP_CHECKOK(mp_init(&B)); + MP_CHECKOK(mp_init(&C)); + MP_CHECKOK(mp_init(&D)); + MP_CHECKOK(mp_init(&C2)); + MP_CHECKOK(mp_init(&C3)); + + /* If either P or Q is the point at infinity, then return the other + * point */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); + goto CLEANUP; + } + if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_copy(pz, rz)); + goto CLEANUP; + } + + /* A = qx * pz^2, B = qy * pz^3 */ + MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth)); + MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth)); + MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth)); + MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth)); + + /* C = A - px, D = B - py */ + MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth)); + MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth)); + + if (mp_cmp_z(&C) == 0) { + /* P == Q or P == -Q */ + if (mp_cmp_z(&D) == 0) { + /* P == Q */ + /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */ + MP_DIGIT(&D, 0) = 1; /* Set D to 1. */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group)); + } else { + /* P == -Q */ + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + } + goto CLEANUP; + } + + /* C2 = C^2, C3 = C^3 */ + MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth)); + MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth)); + + /* rz = pz * C */ + MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth)); + + /* C = px * C^2 */ + MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth)); + /* A = D^2 */ + MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth)); + + /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ + MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth)); + MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth)); + + /* C3 = py * C^3 */ + MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth)); + + /* ry = D * (px * C^2 - rx) - py * C^3 */ + MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth)); + MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth)); + +CLEANUP: + mp_clear(&A); + mp_clear(&B); + mp_clear(&C); + mp_clear(&D); + mp_clear(&C2); + mp_clear(&C3); + return res; +} + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * Jacobian coordinates. + * + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. + * + * This routine implements Point Doubling in the Jacobian Projective + * space as described in the paper "Efficient elliptic curve exponentiation + * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono. + */ +mp_err +ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz, + mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int t0, t1, M, S; + + MP_DIGITS(&t0) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&M) = 0; + MP_DIGITS(&S) = 0; + MP_CHECKOK(mp_init(&t0)); + MP_CHECKOK(mp_init(&t1)); + MP_CHECKOK(mp_init(&M)); + MP_CHECKOK(mp_init(&S)); + + /* P == inf or P == -P */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) { + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + goto CLEANUP; + } + + if (mp_cmp_d(pz, 1) == 0) { + /* M = 3 * px^2 + a */ + MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &group->curvea, &M, group->meth)); + } else if (MP_SIGN(&group->curvea) == MP_NEG && + MP_USED(&group->curvea) == 1 && + MP_DIGIT(&group->curvea, 0) == 3) { + /* M = 3 * (px + pz^2) * (px - pz^2) */ + MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth)); + MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth)); + } else { + /* M = 3 * (px^2) + a * (pz^4) */ + MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth)); + MP_CHECKOK(group->meth->field_mul(&M, &group->curvea, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth)); + } + + /* rz = 2 * py * pz */ + /* t0 = 4 * py^2 */ + if (mp_cmp_d(pz, 1) == 0) { + MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth)); + MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth)); + } else { + MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth)); + MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth)); + } + + /* S = 4 * px * py^2 = px * (2 * py)^2 */ + MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth)); + + /* rx = M^2 - 2 * S */ + MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth)); + + /* ry = M * (S - rx) - 8 * py^4 */ + MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth)); + if (mp_isodd(&t1)) { + MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1)); + } + MP_CHECKOK(mp_div_2(&t1, &t1)); + MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth)); + MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth)); + MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth)); + +CLEANUP: + mp_clear(&t0); + mp_clear(&t1); + mp_clear(&M); + mp_clear(&S); + return res; +} + +/* by default, this routine is unused and thus doesn't need to be compiled */ +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Elliptic curve points P and R can be + * identical. Uses mixed Jacobian-affine coordinates. Assumes input is + * already field-encoded using field_enc, and returns output that is still + * field-encoded. Uses 4-bit window method. */ +mp_err +ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[16][2], rz; + int i, ni, d; + + MP_DIGITS(&rz) = 0; + for (i = 0; i < 16; i++) { + MP_DIGITS(&precomp[i][0]) = 0; + MP_DIGITS(&precomp[i][1]) = 0; + } + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG); + + /* initialize precomputation table */ + for (i = 0; i < 16; i++) { + MP_CHECKOK(mp_init(&precomp[i][0])); + MP_CHECKOK(mp_init(&precomp[i][1])); + } + + /* fill precomputation table */ + mp_zero(&precomp[0][0]); + mp_zero(&precomp[0][1]); + MP_CHECKOK(mp_copy(px, &precomp[1][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][1])); + for (i = 2; i < 16; i++) { + MP_CHECKOK(group->point_add(&precomp[1][0], &precomp[1][1], + &precomp[i - 1][0], &precomp[i - 1][1], + &precomp[i][0], &precomp[i][1], group)); + } + + d = (mpl_significant_bits(n) + 3) / 4; + + /* R = inf */ + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + for (i = d - 1; i >= 0; i--) { + /* compute window ni */ + ni = MP_GET_BIT(n, 4 * i + 3); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i + 2); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i + 1); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i); + /* R = 2^4 * R */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + /* R = R + (ni * P) */ + MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ni][0], &precomp[ni][1], rx, ry, + &rz, group)); + } + + /* convert result S to affine coordinates */ + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + +CLEANUP: + mp_clear(&rz); + for (i = 0; i < 16; i++) { + mp_clear(&precomp[i][0]); + mp_clear(&precomp[i][1]); + } + return res; +} +#endif + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Uses mixed Jacobian-affine coordinates. Input and output values are + * assumed to be NOT field-encoded. Uses algorithm 15 (simultaneous + * multiple point multiplication) from Brown, Hankerson, Lopez, Menezes. + * Software Implementation of the NIST Elliptic Curves over Prime Fields. */ +mp_err +ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[4][4][2]; + mp_int rz; + const mp_int *a, *b; + unsigned int i, j; + int ai, bi, d; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_DIGITS(&precomp[i][j][0]) = 0; + MP_DIGITS(&precomp[i][j][1]) = 0; + } + } + MP_DIGITS(&rz) = 0; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + /* initialize precomputation table */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_CHECKOK(mp_init(&precomp[i][j][0])); + MP_CHECKOK(mp_init(&precomp[i][j][1])); + } + } + + /* fill precomputation table */ + /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ + if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { + a = k2; + b = k1; + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); + } + MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); + } else { + a = k1; + b = k2; + MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); + } + } + /* precompute [*][0][*] */ + mp_zero(&precomp[0][0][0]); + mp_zero(&precomp[0][0][1]); + MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], group)); + MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], + &precomp[3][0][0], &precomp[3][0][1], group)); + /* precompute [*][1][*] */ + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][1][0], &precomp[i][1][1], group)); + } + /* precompute [*][2][*] */ + MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][2][0], &precomp[i][2][1], group)); + } + /* precompute [*][3][*] */ + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], + &precomp[0][3][0], &precomp[0][3][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][3][0], &precomp[i][3][1], group)); + } + + d = (mpl_significant_bits(a) + 1) / 2; + + /* R = inf */ + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + for (i = d; i-- > 0;) { + ai = MP_GET_BIT(a, 2 * i + 1); + ai <<= 1; + ai |= MP_GET_BIT(a, 2 * i); + bi = MP_GET_BIT(b, 2 * i + 1); + bi <<= 1; + bi |= MP_GET_BIT(b, 2 * i); + /* R = 2^2 * R */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + /* R = R + (ai * A + bi * B) */ + MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ai][bi][0], &precomp[ai][bi][1], + rx, ry, &rz, group)); + } + + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + mp_clear(&rz); + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + mp_clear(&precomp[i][j][0]); + mp_clear(&precomp[i][j][1]); + } + } + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_jm.c b/security/nss/lib/freebl/ecl/ecp_jm.c new file mode 100644 index 0000000000..7998421713 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_jm.c @@ -0,0 +1,297 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "ecl-priv.h" +#include "mplogic.h" +#include + +#define MAX_SCRATCH 6 + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * Modified Jacobian coordinates. + * + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. + * + */ +static mp_err +ec_GFp_pt_dbl_jm(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *paz4, mp_int *rx, mp_int *ry, mp_int *rz, + mp_int *raz4, mp_int scratch[], const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int *t0, *t1, *M, *S; + + t0 = &scratch[0]; + t1 = &scratch[1]; + M = &scratch[2]; + S = &scratch[3]; + +#if MAX_SCRATCH < 4 +#error "Scratch array defined too small " +#endif + + /* Check for point at infinity */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + /* Set r = pt at infinity by setting rz = 0 */ + + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + goto CLEANUP; + } + + /* M = 3 (px^2) + a*(pz^4) */ + MP_CHECKOK(group->meth->field_sqr(px, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, t0, M, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, M, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, paz4, M, group->meth)); + + /* rz = 2 * py * pz */ + MP_CHECKOK(group->meth->field_mul(py, pz, S, group->meth)); + MP_CHECKOK(group->meth->field_add(S, S, rz, group->meth)); + + /* t0 = 2y^2 , t1 = 8y^4 */ + MP_CHECKOK(group->meth->field_sqr(py, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, t0, t0, group->meth)); + MP_CHECKOK(group->meth->field_sqr(t0, t1, group->meth)); + MP_CHECKOK(group->meth->field_add(t1, t1, t1, group->meth)); + + /* S = 4 * px * py^2 = 2 * px * t0 */ + MP_CHECKOK(group->meth->field_mul(px, t0, S, group->meth)); + MP_CHECKOK(group->meth->field_add(S, S, S, group->meth)); + + /* rx = M^2 - 2S */ + MP_CHECKOK(group->meth->field_sqr(M, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth)); + + /* ry = M * (S - rx) - t1 */ + MP_CHECKOK(group->meth->field_sub(S, rx, S, group->meth)); + MP_CHECKOK(group->meth->field_mul(S, M, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, t1, ry, group->meth)); + + /* ra*z^4 = 2*t1*(apz4) */ + MP_CHECKOK(group->meth->field_mul(paz4, t1, raz4, group->meth)); + MP_CHECKOK(group->meth->field_add(raz4, raz4, raz4, group->meth)); + +CLEANUP: + return res; +} + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. + * Uses mixed Modified_Jacobian-affine coordinates. Assumes input is + * already field-encoded using field_enc, and returns output that is still + * field-encoded. */ +static mp_err +ec_GFp_pt_add_jm_aff(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *paz4, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz, + mp_int *raz4, mp_int scratch[], const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int *A, *B, *C, *D, *C2, *C3; + + A = &scratch[0]; + B = &scratch[1]; + C = &scratch[2]; + D = &scratch[3]; + C2 = &scratch[4]; + C3 = &scratch[5]; + +#if MAX_SCRATCH < 6 +#error "Scratch array defined too small " +#endif + + /* If either P or Q is the point at infinity, then return the other + * point */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); + MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth)); + MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth)); + MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth)); + goto CLEANUP; + } + if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_copy(pz, rz)); + MP_CHECKOK(mp_copy(paz4, raz4)); + goto CLEANUP; + } + + /* A = qx * pz^2, B = qy * pz^3 */ + MP_CHECKOK(group->meth->field_sqr(pz, A, group->meth)); + MP_CHECKOK(group->meth->field_mul(A, pz, B, group->meth)); + MP_CHECKOK(group->meth->field_mul(A, qx, A, group->meth)); + MP_CHECKOK(group->meth->field_mul(B, qy, B, group->meth)); + + /* Check P == Q */ + if (mp_cmp(A, px) == 0) { + if (mp_cmp(B, py) == 0) { + /* If Px == Qx && Py == Qy, double P. */ + return ec_GFp_pt_dbl_jm(px, py, pz, paz4, rx, ry, rz, raz4, + scratch, group); + } + /* If Px == Qx && Py != Qy, return point at infinity. */ + return ec_GFp_pt_set_inf_jac(rx, ry, rz); + } + + /* C = A - px, D = B - py */ + MP_CHECKOK(group->meth->field_sub(A, px, C, group->meth)); + MP_CHECKOK(group->meth->field_sub(B, py, D, group->meth)); + + /* C2 = C^2, C3 = C^3 */ + MP_CHECKOK(group->meth->field_sqr(C, C2, group->meth)); + MP_CHECKOK(group->meth->field_mul(C, C2, C3, group->meth)); + + /* rz = pz * C */ + MP_CHECKOK(group->meth->field_mul(pz, C, rz, group->meth)); + + /* C = px * C^2 */ + MP_CHECKOK(group->meth->field_mul(px, C2, C, group->meth)); + /* A = D^2 */ + MP_CHECKOK(group->meth->field_sqr(D, A, group->meth)); + + /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ + MP_CHECKOK(group->meth->field_add(C, C, rx, group->meth)); + MP_CHECKOK(group->meth->field_add(C3, rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(A, rx, rx, group->meth)); + + /* C3 = py * C^3 */ + MP_CHECKOK(group->meth->field_mul(py, C3, C3, group->meth)); + + /* ry = D * (px * C^2 - rx) - py * C^3 */ + MP_CHECKOK(group->meth->field_sub(C, rx, ry, group->meth)); + MP_CHECKOK(group->meth->field_mul(D, ry, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, C3, ry, group->meth)); + + /* raz4 = a * rz^4 */ + MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth)); + MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth)); + MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth)); +CLEANUP: + return res; +} + +/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic + * curve points P and R can be identical. Uses mixed Modified-Jacobian + * co-ordinates for doubling and Chudnovsky Jacobian coordinates for + * additions. Assumes input is already field-encoded using field_enc, and + * returns output that is still field-encoded. Uses 5-bit window NAF + * method (algorithm 11) for scalar-point multiplication from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic + * Curves Over Prime Fields. */ +mp_err +ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[16][2], rz, tpx, tpy; + mp_int raz4; + mp_int scratch[MAX_SCRATCH]; + signed char *naf = NULL; + int i, orderBitSize = 0; + + MP_DIGITS(&rz) = 0; + MP_DIGITS(&raz4) = 0; + MP_DIGITS(&tpx) = 0; + MP_DIGITS(&tpy) = 0; + for (i = 0; i < 16; i++) { + MP_DIGITS(&precomp[i][0]) = 0; + MP_DIGITS(&precomp[i][1]) = 0; + } + for (i = 0; i < MAX_SCRATCH; i++) { + MP_DIGITS(&scratch[i]) = 0; + } + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG); + + /* initialize precomputation table */ + MP_CHECKOK(mp_init(&tpx)); + MP_CHECKOK(mp_init(&tpy)); + ; + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(mp_init(&raz4)); + + for (i = 0; i < 16; i++) { + MP_CHECKOK(mp_init(&precomp[i][0])); + MP_CHECKOK(mp_init(&precomp[i][1])); + } + for (i = 0; i < MAX_SCRATCH; i++) { + MP_CHECKOK(mp_init(&scratch[i])); + } + + /* Set out[8] = P */ + MP_CHECKOK(mp_copy(px, &precomp[8][0])); + MP_CHECKOK(mp_copy(py, &precomp[8][1])); + + /* Set (tpx, tpy) = 2P */ + MP_CHECKOK(group->point_dbl(&precomp[8][0], &precomp[8][1], &tpx, &tpy, + group)); + + /* Set 3P, 5P, ..., 15P */ + for (i = 8; i < 15; i++) { + MP_CHECKOK(group->point_add(&precomp[i][0], &precomp[i][1], &tpx, &tpy, + &precomp[i + 1][0], &precomp[i + 1][1], + group)); + } + + /* Set -15P, -13P, ..., -P */ + for (i = 0; i < 8; i++) { + MP_CHECKOK(mp_copy(&precomp[15 - i][0], &precomp[i][0])); + MP_CHECKOK(group->meth->field_neg(&precomp[15 - i][1], &precomp[i][1], + group->meth)); + } + + /* R = inf */ + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + orderBitSize = mpl_significant_bits(&group->order); + + /* Allocate memory for NAF */ + naf = (signed char *)malloc(sizeof(signed char) * (orderBitSize + 1)); + if (naf == NULL) { + res = MP_MEM; + goto CLEANUP; + } + + /* Compute 5NAF */ + ec_compute_wNAF(naf, orderBitSize, n, 5); + + /* wNAF method */ + for (i = orderBitSize; i >= 0; i--) { + /* R = 2R */ + ec_GFp_pt_dbl_jm(rx, ry, &rz, &raz4, rx, ry, &rz, + &raz4, scratch, group); + if (naf[i] != 0) { + ec_GFp_pt_add_jm_aff(rx, ry, &rz, &raz4, + &precomp[(naf[i] + 15) / 2][0], + &precomp[(naf[i] + 15) / 2][1], rx, ry, + &rz, &raz4, scratch, group); + } + } + + /* convert result S to affine coordinates */ + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + +CLEANUP: + for (i = 0; i < MAX_SCRATCH; i++) { + mp_clear(&scratch[i]); + } + for (i = 0; i < 16; i++) { + mp_clear(&precomp[i][0]); + mp_clear(&precomp[i][1]); + } + mp_clear(&tpx); + mp_clear(&tpy); + mp_clear(&rz); + mp_clear(&raz4); + if (naf) { + memset(naf, 0, orderBitSize + 1); + } + free(naf); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_mont.c b/security/nss/lib/freebl/ecl/ecp_mont.c new file mode 100644 index 0000000000..779685b4dd --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_mont.c @@ -0,0 +1,154 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Uses Montgomery reduction for field arithmetic. See mpi/mpmontg.c for + * code implementation. */ + +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" +#include "ecl-priv.h" +#include "ecp.h" +#include +#include + +/* Construct a generic GFMethod for arithmetic over prime fields with + * irreducible irr. */ +GFMethod * +GFMethod_consGFp_mont(const mp_int *irr) +{ + mp_err res = MP_OKAY; + GFMethod *meth = NULL; + mp_mont_modulus *mmm; + + meth = GFMethod_consGFp(irr); + if (meth == NULL) + return NULL; + + mmm = (mp_mont_modulus *)malloc(sizeof(mp_mont_modulus)); + if (mmm == NULL) { + res = MP_MEM; + goto CLEANUP; + } + + meth->field_mul = &ec_GFp_mul_mont; + meth->field_sqr = &ec_GFp_sqr_mont; + meth->field_div = &ec_GFp_div_mont; + meth->field_enc = &ec_GFp_enc_mont; + meth->field_dec = &ec_GFp_dec_mont; + meth->extra1 = mmm; + meth->extra2 = NULL; + meth->extra_free = &ec_GFp_extra_free_mont; + + mmm->N = meth->irr; + mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0)); + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Wrapper functions for generic prime field arithmetic. */ + +/* Field multiplication using Montgomery reduction. */ +mp_err +ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + +#ifdef MP_MONT_USE_MP_MUL + /* if MP_MONT_USE_MP_MUL is defined, then the function s_mp_mul_mont + * is not implemented and we have to use mp_mul and s_mp_redc directly + */ + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1)); +#else + mp_int s; + + MP_DIGITS(&s) = 0; + /* s_mp_mul_mont doesn't allow source and destination to be the same */ + if ((a == r) || (b == r)) { + MP_CHECKOK(mp_init(&s)); + MP_CHECKOK(s_mp_mul_mont(a, b, &s, (mp_mont_modulus *)meth->extra1)); + MP_CHECKOK(mp_copy(&s, r)); + mp_clear(&s); + } else { + return s_mp_mul_mont(a, b, r, (mp_mont_modulus *)meth->extra1); + } +#endif +CLEANUP: + return res; +} + +/* Field squaring using Montgomery reduction. */ +mp_err +ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return ec_GFp_mul_mont(a, a, r, meth); +} + +/* Field division using Montgomery reduction. */ +mp_err +ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + /* if A=aZ represents a encoded in montgomery coordinates with Z and # + * and \ respectively represent multiplication and division in + * montgomery coordinates, then A\B = (a/b)Z = (A/B)Z and Binv = + * (1/b)Z = (1/B)(Z^2) where B # Binv = Z */ + MP_CHECKOK(ec_GFp_div(a, b, r, meth)); + MP_CHECKOK(ec_GFp_enc_mont(r, r, meth)); + if (a == NULL) { + MP_CHECKOK(ec_GFp_enc_mont(r, r, meth)); + } +CLEANUP: + return res; +} + +/* Encode a field element in Montgomery form. See s_mp_to_mont in + * mpi/mpmontg.c */ +mp_err +ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_mont_modulus *mmm; + mp_err res = MP_OKAY; + + mmm = (mp_mont_modulus *)meth->extra1; + MP_CHECKOK(mp_copy(a, r)); + MP_CHECKOK(s_mp_lshd(r, MP_USED(&mmm->N))); + MP_CHECKOK(mp_mod(r, &mmm->N, r)); +CLEANUP: + return res; +} + +/* Decode a field element from Montgomery form. */ +mp_err +ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + if (a != r) { + MP_CHECKOK(mp_copy(a, r)); + } + MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1)); +CLEANUP: + return res; +} + +/* Free the memory allocated to the extra fields of Montgomery GFMethod + * object. */ +void +ec_GFp_extra_free_mont(GFMethod *meth) +{ + if (meth->extra1 != NULL) { + free(meth->extra1); + meth->extra1 = NULL; + } +} diff --git a/security/nss/lib/freebl/ecl/ecp_secp384r1.c b/security/nss/lib/freebl/ecl/ecp_secp384r1.c new file mode 100644 index 0000000000..aee99bf23a --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_secp384r1.c @@ -0,0 +1,20817 @@ +/* Autogenerated: ECCKiila https://gitlab.com/nisec/ecckiila */ +/*- + * MIT License + * - + * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley + * - + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * - + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * - + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__SIZEOF_INT128__) && !defined(PEDANTIC) + +#include +#include +#define LIMB_BITS 64 +#define LIMB_CNT 6 +/* Field elements */ +typedef uint64_t fe_t[LIMB_CNT]; +typedef uint64_t limb_t; + +#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t)) +#define fe_set_zero(d) memset(d, 0, sizeof(fe_t)) + +/* Projective points */ +typedef struct { + fe_t X; + fe_t Y; + fe_t Z; +} pt_prj_t; + +/* Affine points */ +typedef struct { + fe_t X; + fe_t Y; +} pt_aff_t; + +/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */ +/*- + * MIT License + * + * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file). + * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Autogenerated: word_by_word_montgomery --static --use-value-barrier secp384r1 64 '2^384 - 2^128 - 2^96 + 2^32 - 1' */ +/* curve description: secp384r1 */ +/* machine_wordsize = 64 (from "64") */ +/* requested operations: (all) */ +/* m = 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff (from "2^384 - 2^128 - 2^96 + 2^32 - 1") */ +/* */ +/* NOTE: In addition to the bounds specified above each function, all */ +/* functions synthesized for this Montgomery arithmetic require the */ +/* input to be strictly less than the prime modulus (m), and also */ +/* require the input to be in the unique saturated representation. */ +/* All functions also ensure that these two properties are true of */ +/* return values. */ +/* */ +/* Computed values: */ +/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */ +/* twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) in */ +/* if x1 & (2^384-1) < 2^383 then x1 & (2^384-1) else (x1 & (2^384-1)) - 2^384 */ + +#include +typedef unsigned char fiat_secp384r1_uint1; +typedef signed char fiat_secp384r1_int1; +#ifdef __GNUC__ +#define FIAT_SECP384R1_FIAT_EXTENSION __extension__ +#define FIAT_SECP384R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP384R1_FIAT_EXTENSION +#define FIAT_SECP384R1_FIAT_INLINE +#endif + +FIAT_SECP384R1_FIAT_EXTENSION typedef signed __int128 fiat_secp384r1_int128; +FIAT_SECP384R1_FIAT_EXTENSION typedef unsigned __int128 fiat_secp384r1_uint128; + +/* The type fiat_secp384r1_montgomery_domain_field_element is a field element in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ +typedef uint64_t fiat_secp384r1_montgomery_domain_field_element[6]; + +/* The type fiat_secp384r1_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ +typedef uint64_t fiat_secp384r1_non_montgomery_domain_field_element[6]; + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + +#if !defined(FIAT_SECP384R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +static __inline__ uint64_t +fiat_secp384r1_value_barrier_u64(uint64_t a) +{ + __asm__("" + : "+r"(a) + : /* no inputs */); + return a; +} +#else +#define fiat_secp384r1_value_barrier_u64(x) (x) +#endif + +/* + * The function fiat_secp384r1_addcarryx_u64 is an addition with carry. + * + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^64 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffffffffffff] + * arg3: [0x0 ~> 0xffffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp384r1_addcarryx_u64(uint64_t *out1, + fiat_secp384r1_uint1 *out2, + fiat_secp384r1_uint1 arg1, + uint64_t arg2, uint64_t arg3) +{ + fiat_secp384r1_uint128 x1; + uint64_t x2; + fiat_secp384r1_uint1 x3; + x1 = ((arg1 + (fiat_secp384r1_uint128)arg2) + arg3); + x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); + x3 = (fiat_secp384r1_uint1)(x1 >> 64); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp384r1_subborrowx_u64 is a subtraction with borrow. + * + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^64 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffffffffffff] + * arg3: [0x0 ~> 0xffffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp384r1_subborrowx_u64(uint64_t *out1, + fiat_secp384r1_uint1 *out2, + fiat_secp384r1_uint1 arg1, + uint64_t arg2, uint64_t arg3) +{ + fiat_secp384r1_int128 x1; + fiat_secp384r1_int1 x2; + uint64_t x3; + x1 = ((arg2 - (fiat_secp384r1_int128)arg1) - arg3); + x2 = (fiat_secp384r1_int1)(x1 >> 64); + x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); + *out1 = x3; + *out2 = (fiat_secp384r1_uint1)(0x0 - x2); +} + +/* + * The function fiat_secp384r1_mulx_u64 is a multiplication, returning the full double-width result. + * + * Postconditions: + * out1 = (arg1 * arg2) mod 2^64 + * out2 = ⌊arg1 * arg2 / 2^64⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffffffffffff] + * arg2: [0x0 ~> 0xffffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [0x0 ~> 0xffffffffffffffff] + */ +static void +fiat_secp384r1_mulx_u64(uint64_t *out1, uint64_t *out2, + uint64_t arg1, uint64_t arg2) +{ + fiat_secp384r1_uint128 x1; + uint64_t x2; + uint64_t x3; + x1 = ((fiat_secp384r1_uint128)arg1 * arg2); + x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); + x3 = (uint64_t)(x1 >> 64); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp384r1_cmovznz_u64 is a single-word conditional move. + * + * Postconditions: + * out1 = (if arg1 = 0 then arg2 else arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffffffffffff] + * arg3: [0x0 ~> 0xffffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + */ +static void +fiat_secp384r1_cmovznz_u64(uint64_t *out1, + fiat_secp384r1_uint1 arg1, uint64_t arg2, + uint64_t arg3) +{ + fiat_secp384r1_uint1 x1; + uint64_t x2; + uint64_t x3; + x1 = (!(!arg1)); + x2 = ((fiat_secp384r1_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); + x3 = ((fiat_secp384r1_value_barrier_u64(x2) & arg3) | + (fiat_secp384r1_value_barrier_u64((~x2)) & arg2)); + *out1 = x3; +} + +/* + * The function fiat_secp384r1_mul multiplies two field elements in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_mul( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + fiat_secp384r1_uint1 x20; + uint64_t x21; + fiat_secp384r1_uint1 x22; + uint64_t x23; + fiat_secp384r1_uint1 x24; + uint64_t x25; + fiat_secp384r1_uint1 x26; + uint64_t x27; + fiat_secp384r1_uint1 x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + fiat_secp384r1_uint1 x45; + uint64_t x46; + fiat_secp384r1_uint1 x47; + uint64_t x48; + fiat_secp384r1_uint1 x49; + uint64_t x50; + fiat_secp384r1_uint1 x51; + uint64_t x52; + fiat_secp384r1_uint1 x53; + uint64_t x54; + uint64_t x55; + fiat_secp384r1_uint1 x56; + uint64_t x57; + fiat_secp384r1_uint1 x58; + uint64_t x59; + fiat_secp384r1_uint1 x60; + uint64_t x61; + fiat_secp384r1_uint1 x62; + uint64_t x63; + fiat_secp384r1_uint1 x64; + uint64_t x65; + fiat_secp384r1_uint1 x66; + uint64_t x67; + fiat_secp384r1_uint1 x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + uint64_t x74; + uint64_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + fiat_secp384r1_uint1 x82; + uint64_t x83; + fiat_secp384r1_uint1 x84; + uint64_t x85; + fiat_secp384r1_uint1 x86; + uint64_t x87; + fiat_secp384r1_uint1 x88; + uint64_t x89; + fiat_secp384r1_uint1 x90; + uint64_t x91; + uint64_t x92; + fiat_secp384r1_uint1 x93; + uint64_t x94; + fiat_secp384r1_uint1 x95; + uint64_t x96; + fiat_secp384r1_uint1 x97; + uint64_t x98; + fiat_secp384r1_uint1 x99; + uint64_t x100; + fiat_secp384r1_uint1 x101; + uint64_t x102; + fiat_secp384r1_uint1 x103; + uint64_t x104; + fiat_secp384r1_uint1 x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + uint64_t x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + uint64_t x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + fiat_secp384r1_uint1 x121; + uint64_t x122; + fiat_secp384r1_uint1 x123; + uint64_t x124; + fiat_secp384r1_uint1 x125; + uint64_t x126; + fiat_secp384r1_uint1 x127; + uint64_t x128; + fiat_secp384r1_uint1 x129; + uint64_t x130; + uint64_t x131; + fiat_secp384r1_uint1 x132; + uint64_t x133; + fiat_secp384r1_uint1 x134; + uint64_t x135; + fiat_secp384r1_uint1 x136; + uint64_t x137; + fiat_secp384r1_uint1 x138; + uint64_t x139; + fiat_secp384r1_uint1 x140; + uint64_t x141; + fiat_secp384r1_uint1 x142; + uint64_t x143; + fiat_secp384r1_uint1 x144; + uint64_t x145; + uint64_t x146; + uint64_t x147; + uint64_t x148; + uint64_t x149; + uint64_t x150; + uint64_t x151; + uint64_t x152; + uint64_t x153; + uint64_t x154; + uint64_t x155; + uint64_t x156; + uint64_t x157; + uint64_t x158; + fiat_secp384r1_uint1 x159; + uint64_t x160; + fiat_secp384r1_uint1 x161; + uint64_t x162; + fiat_secp384r1_uint1 x163; + uint64_t x164; + fiat_secp384r1_uint1 x165; + uint64_t x166; + fiat_secp384r1_uint1 x167; + uint64_t x168; + uint64_t x169; + fiat_secp384r1_uint1 x170; + uint64_t x171; + fiat_secp384r1_uint1 x172; + uint64_t x173; + fiat_secp384r1_uint1 x174; + uint64_t x175; + fiat_secp384r1_uint1 x176; + uint64_t x177; + fiat_secp384r1_uint1 x178; + uint64_t x179; + fiat_secp384r1_uint1 x180; + uint64_t x181; + fiat_secp384r1_uint1 x182; + uint64_t x183; + uint64_t x184; + uint64_t x185; + uint64_t x186; + uint64_t x187; + uint64_t x188; + uint64_t x189; + uint64_t x190; + uint64_t x191; + uint64_t x192; + uint64_t x193; + uint64_t x194; + uint64_t x195; + uint64_t x196; + uint64_t x197; + fiat_secp384r1_uint1 x198; + uint64_t x199; + fiat_secp384r1_uint1 x200; + uint64_t x201; + fiat_secp384r1_uint1 x202; + uint64_t x203; + fiat_secp384r1_uint1 x204; + uint64_t x205; + fiat_secp384r1_uint1 x206; + uint64_t x207; + uint64_t x208; + fiat_secp384r1_uint1 x209; + uint64_t x210; + fiat_secp384r1_uint1 x211; + uint64_t x212; + fiat_secp384r1_uint1 x213; + uint64_t x214; + fiat_secp384r1_uint1 x215; + uint64_t x216; + fiat_secp384r1_uint1 x217; + uint64_t x218; + fiat_secp384r1_uint1 x219; + uint64_t x220; + fiat_secp384r1_uint1 x221; + uint64_t x222; + uint64_t x223; + uint64_t x224; + uint64_t x225; + uint64_t x226; + uint64_t x227; + uint64_t x228; + uint64_t x229; + uint64_t x230; + uint64_t x231; + uint64_t x232; + uint64_t x233; + uint64_t x234; + uint64_t x235; + fiat_secp384r1_uint1 x236; + uint64_t x237; + fiat_secp384r1_uint1 x238; + uint64_t x239; + fiat_secp384r1_uint1 x240; + uint64_t x241; + fiat_secp384r1_uint1 x242; + uint64_t x243; + fiat_secp384r1_uint1 x244; + uint64_t x245; + uint64_t x246; + fiat_secp384r1_uint1 x247; + uint64_t x248; + fiat_secp384r1_uint1 x249; + uint64_t x250; + fiat_secp384r1_uint1 x251; + uint64_t x252; + fiat_secp384r1_uint1 x253; + uint64_t x254; + fiat_secp384r1_uint1 x255; + uint64_t x256; + fiat_secp384r1_uint1 x257; + uint64_t x258; + fiat_secp384r1_uint1 x259; + uint64_t x260; + uint64_t x261; + uint64_t x262; + uint64_t x263; + uint64_t x264; + uint64_t x265; + uint64_t x266; + uint64_t x267; + uint64_t x268; + uint64_t x269; + uint64_t x270; + uint64_t x271; + uint64_t x272; + uint64_t x273; + uint64_t x274; + fiat_secp384r1_uint1 x275; + uint64_t x276; + fiat_secp384r1_uint1 x277; + uint64_t x278; + fiat_secp384r1_uint1 x279; + uint64_t x280; + fiat_secp384r1_uint1 x281; + uint64_t x282; + fiat_secp384r1_uint1 x283; + uint64_t x284; + uint64_t x285; + fiat_secp384r1_uint1 x286; + uint64_t x287; + fiat_secp384r1_uint1 x288; + uint64_t x289; + fiat_secp384r1_uint1 x290; + uint64_t x291; + fiat_secp384r1_uint1 x292; + uint64_t x293; + fiat_secp384r1_uint1 x294; + uint64_t x295; + fiat_secp384r1_uint1 x296; + uint64_t x297; + fiat_secp384r1_uint1 x298; + uint64_t x299; + uint64_t x300; + uint64_t x301; + uint64_t x302; + uint64_t x303; + uint64_t x304; + uint64_t x305; + uint64_t x306; + uint64_t x307; + uint64_t x308; + uint64_t x309; + uint64_t x310; + uint64_t x311; + uint64_t x312; + fiat_secp384r1_uint1 x313; + uint64_t x314; + fiat_secp384r1_uint1 x315; + uint64_t x316; + fiat_secp384r1_uint1 x317; + uint64_t x318; + fiat_secp384r1_uint1 x319; + uint64_t x320; + fiat_secp384r1_uint1 x321; + uint64_t x322; + uint64_t x323; + fiat_secp384r1_uint1 x324; + uint64_t x325; + fiat_secp384r1_uint1 x326; + uint64_t x327; + fiat_secp384r1_uint1 x328; + uint64_t x329; + fiat_secp384r1_uint1 x330; + uint64_t x331; + fiat_secp384r1_uint1 x332; + uint64_t x333; + fiat_secp384r1_uint1 x334; + uint64_t x335; + fiat_secp384r1_uint1 x336; + uint64_t x337; + uint64_t x338; + uint64_t x339; + uint64_t x340; + uint64_t x341; + uint64_t x342; + uint64_t x343; + uint64_t x344; + uint64_t x345; + uint64_t x346; + uint64_t x347; + uint64_t x348; + uint64_t x349; + uint64_t x350; + uint64_t x351; + fiat_secp384r1_uint1 x352; + uint64_t x353; + fiat_secp384r1_uint1 x354; + uint64_t x355; + fiat_secp384r1_uint1 x356; + uint64_t x357; + fiat_secp384r1_uint1 x358; + uint64_t x359; + fiat_secp384r1_uint1 x360; + uint64_t x361; + uint64_t x362; + fiat_secp384r1_uint1 x363; + uint64_t x364; + fiat_secp384r1_uint1 x365; + uint64_t x366; + fiat_secp384r1_uint1 x367; + uint64_t x368; + fiat_secp384r1_uint1 x369; + uint64_t x370; + fiat_secp384r1_uint1 x371; + uint64_t x372; + fiat_secp384r1_uint1 x373; + uint64_t x374; + fiat_secp384r1_uint1 x375; + uint64_t x376; + uint64_t x377; + uint64_t x378; + uint64_t x379; + uint64_t x380; + uint64_t x381; + uint64_t x382; + uint64_t x383; + uint64_t x384; + uint64_t x385; + uint64_t x386; + uint64_t x387; + uint64_t x388; + uint64_t x389; + fiat_secp384r1_uint1 x390; + uint64_t x391; + fiat_secp384r1_uint1 x392; + uint64_t x393; + fiat_secp384r1_uint1 x394; + uint64_t x395; + fiat_secp384r1_uint1 x396; + uint64_t x397; + fiat_secp384r1_uint1 x398; + uint64_t x399; + uint64_t x400; + fiat_secp384r1_uint1 x401; + uint64_t x402; + fiat_secp384r1_uint1 x403; + uint64_t x404; + fiat_secp384r1_uint1 x405; + uint64_t x406; + fiat_secp384r1_uint1 x407; + uint64_t x408; + fiat_secp384r1_uint1 x409; + uint64_t x410; + fiat_secp384r1_uint1 x411; + uint64_t x412; + fiat_secp384r1_uint1 x413; + uint64_t x414; + uint64_t x415; + uint64_t x416; + uint64_t x417; + uint64_t x418; + uint64_t x419; + uint64_t x420; + uint64_t x421; + uint64_t x422; + uint64_t x423; + uint64_t x424; + uint64_t x425; + uint64_t x426; + uint64_t x427; + uint64_t x428; + fiat_secp384r1_uint1 x429; + uint64_t x430; + fiat_secp384r1_uint1 x431; + uint64_t x432; + fiat_secp384r1_uint1 x433; + uint64_t x434; + fiat_secp384r1_uint1 x435; + uint64_t x436; + fiat_secp384r1_uint1 x437; + uint64_t x438; + uint64_t x439; + fiat_secp384r1_uint1 x440; + uint64_t x441; + fiat_secp384r1_uint1 x442; + uint64_t x443; + fiat_secp384r1_uint1 x444; + uint64_t x445; + fiat_secp384r1_uint1 x446; + uint64_t x447; + fiat_secp384r1_uint1 x448; + uint64_t x449; + fiat_secp384r1_uint1 x450; + uint64_t x451; + fiat_secp384r1_uint1 x452; + uint64_t x453; + uint64_t x454; + fiat_secp384r1_uint1 x455; + uint64_t x456; + fiat_secp384r1_uint1 x457; + uint64_t x458; + fiat_secp384r1_uint1 x459; + uint64_t x460; + fiat_secp384r1_uint1 x461; + uint64_t x462; + fiat_secp384r1_uint1 x463; + uint64_t x464; + fiat_secp384r1_uint1 x465; + uint64_t x466; + fiat_secp384r1_uint1 x467; + uint64_t x468; + uint64_t x469; + uint64_t x470; + uint64_t x471; + uint64_t x472; + uint64_t x473; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[4]); + x5 = (arg1[5]); + x6 = (arg1[0]); + fiat_secp384r1_mulx_u64(&x7, &x8, x6, (arg2[5])); + fiat_secp384r1_mulx_u64(&x9, &x10, x6, (arg2[4])); + fiat_secp384r1_mulx_u64(&x11, &x12, x6, (arg2[3])); + fiat_secp384r1_mulx_u64(&x13, &x14, x6, (arg2[2])); + fiat_secp384r1_mulx_u64(&x15, &x16, x6, (arg2[1])); + fiat_secp384r1_mulx_u64(&x17, &x18, x6, (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x19, &x20, 0x0, x18, x15); + fiat_secp384r1_addcarryx_u64(&x21, &x22, x20, x16, x13); + fiat_secp384r1_addcarryx_u64(&x23, &x24, x22, x14, x11); + fiat_secp384r1_addcarryx_u64(&x25, &x26, x24, x12, x9); + fiat_secp384r1_addcarryx_u64(&x27, &x28, x26, x10, x7); + x29 = (x28 + x8); + fiat_secp384r1_mulx_u64(&x30, &x31, x17, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x32, &x33, x30, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x34, &x35, x30, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x36, &x37, x30, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x38, &x39, x30, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x40, &x41, x30, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x42, &x43, x30, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x44, &x45, 0x0, x43, x40); + fiat_secp384r1_addcarryx_u64(&x46, &x47, x45, x41, x38); + fiat_secp384r1_addcarryx_u64(&x48, &x49, x47, x39, x36); + fiat_secp384r1_addcarryx_u64(&x50, &x51, x49, x37, x34); + fiat_secp384r1_addcarryx_u64(&x52, &x53, x51, x35, x32); + x54 = (x53 + x33); + fiat_secp384r1_addcarryx_u64(&x55, &x56, 0x0, x17, x42); + fiat_secp384r1_addcarryx_u64(&x57, &x58, x56, x19, x44); + fiat_secp384r1_addcarryx_u64(&x59, &x60, x58, x21, x46); + fiat_secp384r1_addcarryx_u64(&x61, &x62, x60, x23, x48); + fiat_secp384r1_addcarryx_u64(&x63, &x64, x62, x25, x50); + fiat_secp384r1_addcarryx_u64(&x65, &x66, x64, x27, x52); + fiat_secp384r1_addcarryx_u64(&x67, &x68, x66, x29, x54); + fiat_secp384r1_mulx_u64(&x69, &x70, x1, (arg2[5])); + fiat_secp384r1_mulx_u64(&x71, &x72, x1, (arg2[4])); + fiat_secp384r1_mulx_u64(&x73, &x74, x1, (arg2[3])); + fiat_secp384r1_mulx_u64(&x75, &x76, x1, (arg2[2])); + fiat_secp384r1_mulx_u64(&x77, &x78, x1, (arg2[1])); + fiat_secp384r1_mulx_u64(&x79, &x80, x1, (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x81, &x82, 0x0, x80, x77); + fiat_secp384r1_addcarryx_u64(&x83, &x84, x82, x78, x75); + fiat_secp384r1_addcarryx_u64(&x85, &x86, x84, x76, x73); + fiat_secp384r1_addcarryx_u64(&x87, &x88, x86, x74, x71); + fiat_secp384r1_addcarryx_u64(&x89, &x90, x88, x72, x69); + x91 = (x90 + x70); + fiat_secp384r1_addcarryx_u64(&x92, &x93, 0x0, x57, x79); + fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x59, x81); + fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x61, x83); + fiat_secp384r1_addcarryx_u64(&x98, &x99, x97, x63, x85); + fiat_secp384r1_addcarryx_u64(&x100, &x101, x99, x65, x87); + fiat_secp384r1_addcarryx_u64(&x102, &x103, x101, x67, x89); + fiat_secp384r1_addcarryx_u64(&x104, &x105, x103, x68, x91); + fiat_secp384r1_mulx_u64(&x106, &x107, x92, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x108, &x109, x106, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x110, &x111, x106, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x112, &x113, x106, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x114, &x115, x106, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x116, &x117, x106, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x118, &x119, x106, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x120, &x121, 0x0, x119, x116); + fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x117, x114); + fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x115, x112); + fiat_secp384r1_addcarryx_u64(&x126, &x127, x125, x113, x110); + fiat_secp384r1_addcarryx_u64(&x128, &x129, x127, x111, x108); + x130 = (x129 + x109); + fiat_secp384r1_addcarryx_u64(&x131, &x132, 0x0, x92, x118); + fiat_secp384r1_addcarryx_u64(&x133, &x134, x132, x94, x120); + fiat_secp384r1_addcarryx_u64(&x135, &x136, x134, x96, x122); + fiat_secp384r1_addcarryx_u64(&x137, &x138, x136, x98, x124); + fiat_secp384r1_addcarryx_u64(&x139, &x140, x138, x100, x126); + fiat_secp384r1_addcarryx_u64(&x141, &x142, x140, x102, x128); + fiat_secp384r1_addcarryx_u64(&x143, &x144, x142, x104, x130); + x145 = ((uint64_t)x144 + x105); + fiat_secp384r1_mulx_u64(&x146, &x147, x2, (arg2[5])); + fiat_secp384r1_mulx_u64(&x148, &x149, x2, (arg2[4])); + fiat_secp384r1_mulx_u64(&x150, &x151, x2, (arg2[3])); + fiat_secp384r1_mulx_u64(&x152, &x153, x2, (arg2[2])); + fiat_secp384r1_mulx_u64(&x154, &x155, x2, (arg2[1])); + fiat_secp384r1_mulx_u64(&x156, &x157, x2, (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x157, x154); + fiat_secp384r1_addcarryx_u64(&x160, &x161, x159, x155, x152); + fiat_secp384r1_addcarryx_u64(&x162, &x163, x161, x153, x150); + fiat_secp384r1_addcarryx_u64(&x164, &x165, x163, x151, x148); + fiat_secp384r1_addcarryx_u64(&x166, &x167, x165, x149, x146); + x168 = (x167 + x147); + fiat_secp384r1_addcarryx_u64(&x169, &x170, 0x0, x133, x156); + fiat_secp384r1_addcarryx_u64(&x171, &x172, x170, x135, x158); + fiat_secp384r1_addcarryx_u64(&x173, &x174, x172, x137, x160); + fiat_secp384r1_addcarryx_u64(&x175, &x176, x174, x139, x162); + fiat_secp384r1_addcarryx_u64(&x177, &x178, x176, x141, x164); + fiat_secp384r1_addcarryx_u64(&x179, &x180, x178, x143, x166); + fiat_secp384r1_addcarryx_u64(&x181, &x182, x180, x145, x168); + fiat_secp384r1_mulx_u64(&x183, &x184, x169, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x185, &x186, x183, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x187, &x188, x183, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x189, &x190, x183, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x191, &x192, x183, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x193, &x194, x183, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x195, &x196, x183, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x197, &x198, 0x0, x196, x193); + fiat_secp384r1_addcarryx_u64(&x199, &x200, x198, x194, x191); + fiat_secp384r1_addcarryx_u64(&x201, &x202, x200, x192, x189); + fiat_secp384r1_addcarryx_u64(&x203, &x204, x202, x190, x187); + fiat_secp384r1_addcarryx_u64(&x205, &x206, x204, x188, x185); + x207 = (x206 + x186); + fiat_secp384r1_addcarryx_u64(&x208, &x209, 0x0, x169, x195); + fiat_secp384r1_addcarryx_u64(&x210, &x211, x209, x171, x197); + fiat_secp384r1_addcarryx_u64(&x212, &x213, x211, x173, x199); + fiat_secp384r1_addcarryx_u64(&x214, &x215, x213, x175, x201); + fiat_secp384r1_addcarryx_u64(&x216, &x217, x215, x177, x203); + fiat_secp384r1_addcarryx_u64(&x218, &x219, x217, x179, x205); + fiat_secp384r1_addcarryx_u64(&x220, &x221, x219, x181, x207); + x222 = ((uint64_t)x221 + x182); + fiat_secp384r1_mulx_u64(&x223, &x224, x3, (arg2[5])); + fiat_secp384r1_mulx_u64(&x225, &x226, x3, (arg2[4])); + fiat_secp384r1_mulx_u64(&x227, &x228, x3, (arg2[3])); + fiat_secp384r1_mulx_u64(&x229, &x230, x3, (arg2[2])); + fiat_secp384r1_mulx_u64(&x231, &x232, x3, (arg2[1])); + fiat_secp384r1_mulx_u64(&x233, &x234, x3, (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x235, &x236, 0x0, x234, x231); + fiat_secp384r1_addcarryx_u64(&x237, &x238, x236, x232, x229); + fiat_secp384r1_addcarryx_u64(&x239, &x240, x238, x230, x227); + fiat_secp384r1_addcarryx_u64(&x241, &x242, x240, x228, x225); + fiat_secp384r1_addcarryx_u64(&x243, &x244, x242, x226, x223); + x245 = (x244 + x224); + fiat_secp384r1_addcarryx_u64(&x246, &x247, 0x0, x210, x233); + fiat_secp384r1_addcarryx_u64(&x248, &x249, x247, x212, x235); + fiat_secp384r1_addcarryx_u64(&x250, &x251, x249, x214, x237); + fiat_secp384r1_addcarryx_u64(&x252, &x253, x251, x216, x239); + fiat_secp384r1_addcarryx_u64(&x254, &x255, x253, x218, x241); + fiat_secp384r1_addcarryx_u64(&x256, &x257, x255, x220, x243); + fiat_secp384r1_addcarryx_u64(&x258, &x259, x257, x222, x245); + fiat_secp384r1_mulx_u64(&x260, &x261, x246, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x262, &x263, x260, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x264, &x265, x260, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x266, &x267, x260, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x268, &x269, x260, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x270, &x271, x260, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x272, &x273, x260, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x274, &x275, 0x0, x273, x270); + fiat_secp384r1_addcarryx_u64(&x276, &x277, x275, x271, x268); + fiat_secp384r1_addcarryx_u64(&x278, &x279, x277, x269, x266); + fiat_secp384r1_addcarryx_u64(&x280, &x281, x279, x267, x264); + fiat_secp384r1_addcarryx_u64(&x282, &x283, x281, x265, x262); + x284 = (x283 + x263); + fiat_secp384r1_addcarryx_u64(&x285, &x286, 0x0, x246, x272); + fiat_secp384r1_addcarryx_u64(&x287, &x288, x286, x248, x274); + fiat_secp384r1_addcarryx_u64(&x289, &x290, x288, x250, x276); + fiat_secp384r1_addcarryx_u64(&x291, &x292, x290, x252, x278); + fiat_secp384r1_addcarryx_u64(&x293, &x294, x292, x254, x280); + fiat_secp384r1_addcarryx_u64(&x295, &x296, x294, x256, x282); + fiat_secp384r1_addcarryx_u64(&x297, &x298, x296, x258, x284); + x299 = ((uint64_t)x298 + x259); + fiat_secp384r1_mulx_u64(&x300, &x301, x4, (arg2[5])); + fiat_secp384r1_mulx_u64(&x302, &x303, x4, (arg2[4])); + fiat_secp384r1_mulx_u64(&x304, &x305, x4, (arg2[3])); + fiat_secp384r1_mulx_u64(&x306, &x307, x4, (arg2[2])); + fiat_secp384r1_mulx_u64(&x308, &x309, x4, (arg2[1])); + fiat_secp384r1_mulx_u64(&x310, &x311, x4, (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x312, &x313, 0x0, x311, x308); + fiat_secp384r1_addcarryx_u64(&x314, &x315, x313, x309, x306); + fiat_secp384r1_addcarryx_u64(&x316, &x317, x315, x307, x304); + fiat_secp384r1_addcarryx_u64(&x318, &x319, x317, x305, x302); + fiat_secp384r1_addcarryx_u64(&x320, &x321, x319, x303, x300); + x322 = (x321 + x301); + fiat_secp384r1_addcarryx_u64(&x323, &x324, 0x0, x287, x310); + fiat_secp384r1_addcarryx_u64(&x325, &x326, x324, x289, x312); + fiat_secp384r1_addcarryx_u64(&x327, &x328, x326, x291, x314); + fiat_secp384r1_addcarryx_u64(&x329, &x330, x328, x293, x316); + fiat_secp384r1_addcarryx_u64(&x331, &x332, x330, x295, x318); + fiat_secp384r1_addcarryx_u64(&x333, &x334, x332, x297, x320); + fiat_secp384r1_addcarryx_u64(&x335, &x336, x334, x299, x322); + fiat_secp384r1_mulx_u64(&x337, &x338, x323, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x339, &x340, x337, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x341, &x342, x337, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x343, &x344, x337, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x345, &x346, x337, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x347, &x348, x337, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x349, &x350, x337, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x351, &x352, 0x0, x350, x347); + fiat_secp384r1_addcarryx_u64(&x353, &x354, x352, x348, x345); + fiat_secp384r1_addcarryx_u64(&x355, &x356, x354, x346, x343); + fiat_secp384r1_addcarryx_u64(&x357, &x358, x356, x344, x341); + fiat_secp384r1_addcarryx_u64(&x359, &x360, x358, x342, x339); + x361 = (x360 + x340); + fiat_secp384r1_addcarryx_u64(&x362, &x363, 0x0, x323, x349); + fiat_secp384r1_addcarryx_u64(&x364, &x365, x363, x325, x351); + fiat_secp384r1_addcarryx_u64(&x366, &x367, x365, x327, x353); + fiat_secp384r1_addcarryx_u64(&x368, &x369, x367, x329, x355); + fiat_secp384r1_addcarryx_u64(&x370, &x371, x369, x331, x357); + fiat_secp384r1_addcarryx_u64(&x372, &x373, x371, x333, x359); + fiat_secp384r1_addcarryx_u64(&x374, &x375, x373, x335, x361); + x376 = ((uint64_t)x375 + x336); + fiat_secp384r1_mulx_u64(&x377, &x378, x5, (arg2[5])); + fiat_secp384r1_mulx_u64(&x379, &x380, x5, (arg2[4])); + fiat_secp384r1_mulx_u64(&x381, &x382, x5, (arg2[3])); + fiat_secp384r1_mulx_u64(&x383, &x384, x5, (arg2[2])); + fiat_secp384r1_mulx_u64(&x385, &x386, x5, (arg2[1])); + fiat_secp384r1_mulx_u64(&x387, &x388, x5, (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x389, &x390, 0x0, x388, x385); + fiat_secp384r1_addcarryx_u64(&x391, &x392, x390, x386, x383); + fiat_secp384r1_addcarryx_u64(&x393, &x394, x392, x384, x381); + fiat_secp384r1_addcarryx_u64(&x395, &x396, x394, x382, x379); + fiat_secp384r1_addcarryx_u64(&x397, &x398, x396, x380, x377); + x399 = (x398 + x378); + fiat_secp384r1_addcarryx_u64(&x400, &x401, 0x0, x364, x387); + fiat_secp384r1_addcarryx_u64(&x402, &x403, x401, x366, x389); + fiat_secp384r1_addcarryx_u64(&x404, &x405, x403, x368, x391); + fiat_secp384r1_addcarryx_u64(&x406, &x407, x405, x370, x393); + fiat_secp384r1_addcarryx_u64(&x408, &x409, x407, x372, x395); + fiat_secp384r1_addcarryx_u64(&x410, &x411, x409, x374, x397); + fiat_secp384r1_addcarryx_u64(&x412, &x413, x411, x376, x399); + fiat_secp384r1_mulx_u64(&x414, &x415, x400, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x416, &x417, x414, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x418, &x419, x414, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x420, &x421, x414, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x422, &x423, x414, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x424, &x425, x414, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x426, &x427, x414, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x428, &x429, 0x0, x427, x424); + fiat_secp384r1_addcarryx_u64(&x430, &x431, x429, x425, x422); + fiat_secp384r1_addcarryx_u64(&x432, &x433, x431, x423, x420); + fiat_secp384r1_addcarryx_u64(&x434, &x435, x433, x421, x418); + fiat_secp384r1_addcarryx_u64(&x436, &x437, x435, x419, x416); + x438 = (x437 + x417); + fiat_secp384r1_addcarryx_u64(&x439, &x440, 0x0, x400, x426); + fiat_secp384r1_addcarryx_u64(&x441, &x442, x440, x402, x428); + fiat_secp384r1_addcarryx_u64(&x443, &x444, x442, x404, x430); + fiat_secp384r1_addcarryx_u64(&x445, &x446, x444, x406, x432); + fiat_secp384r1_addcarryx_u64(&x447, &x448, x446, x408, x434); + fiat_secp384r1_addcarryx_u64(&x449, &x450, x448, x410, x436); + fiat_secp384r1_addcarryx_u64(&x451, &x452, x450, x412, x438); + x453 = ((uint64_t)x452 + x413); + fiat_secp384r1_subborrowx_u64(&x454, &x455, 0x0, x441, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x456, &x457, x455, x443, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x458, &x459, x457, x445, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x460, &x461, x459, x447, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x462, &x463, x461, x449, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x464, &x465, x463, x451, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x466, &x467, x465, x453, 0x0); + fiat_secp384r1_cmovznz_u64(&x468, x467, x454, x441); + fiat_secp384r1_cmovznz_u64(&x469, x467, x456, x443); + fiat_secp384r1_cmovznz_u64(&x470, x467, x458, x445); + fiat_secp384r1_cmovznz_u64(&x471, x467, x460, x447); + fiat_secp384r1_cmovznz_u64(&x472, x467, x462, x449); + fiat_secp384r1_cmovznz_u64(&x473, x467, x464, x451); + out1[0] = x468; + out1[1] = x469; + out1[2] = x470; + out1[3] = x471; + out1[4] = x472; + out1[5] = x473; +} + +/* + * The function fiat_secp384r1_square squares a field element in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_square( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + fiat_secp384r1_uint1 x20; + uint64_t x21; + fiat_secp384r1_uint1 x22; + uint64_t x23; + fiat_secp384r1_uint1 x24; + uint64_t x25; + fiat_secp384r1_uint1 x26; + uint64_t x27; + fiat_secp384r1_uint1 x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + fiat_secp384r1_uint1 x45; + uint64_t x46; + fiat_secp384r1_uint1 x47; + uint64_t x48; + fiat_secp384r1_uint1 x49; + uint64_t x50; + fiat_secp384r1_uint1 x51; + uint64_t x52; + fiat_secp384r1_uint1 x53; + uint64_t x54; + uint64_t x55; + fiat_secp384r1_uint1 x56; + uint64_t x57; + fiat_secp384r1_uint1 x58; + uint64_t x59; + fiat_secp384r1_uint1 x60; + uint64_t x61; + fiat_secp384r1_uint1 x62; + uint64_t x63; + fiat_secp384r1_uint1 x64; + uint64_t x65; + fiat_secp384r1_uint1 x66; + uint64_t x67; + fiat_secp384r1_uint1 x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + uint64_t x74; + uint64_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + fiat_secp384r1_uint1 x82; + uint64_t x83; + fiat_secp384r1_uint1 x84; + uint64_t x85; + fiat_secp384r1_uint1 x86; + uint64_t x87; + fiat_secp384r1_uint1 x88; + uint64_t x89; + fiat_secp384r1_uint1 x90; + uint64_t x91; + uint64_t x92; + fiat_secp384r1_uint1 x93; + uint64_t x94; + fiat_secp384r1_uint1 x95; + uint64_t x96; + fiat_secp384r1_uint1 x97; + uint64_t x98; + fiat_secp384r1_uint1 x99; + uint64_t x100; + fiat_secp384r1_uint1 x101; + uint64_t x102; + fiat_secp384r1_uint1 x103; + uint64_t x104; + fiat_secp384r1_uint1 x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + uint64_t x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + uint64_t x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + fiat_secp384r1_uint1 x121; + uint64_t x122; + fiat_secp384r1_uint1 x123; + uint64_t x124; + fiat_secp384r1_uint1 x125; + uint64_t x126; + fiat_secp384r1_uint1 x127; + uint64_t x128; + fiat_secp384r1_uint1 x129; + uint64_t x130; + uint64_t x131; + fiat_secp384r1_uint1 x132; + uint64_t x133; + fiat_secp384r1_uint1 x134; + uint64_t x135; + fiat_secp384r1_uint1 x136; + uint64_t x137; + fiat_secp384r1_uint1 x138; + uint64_t x139; + fiat_secp384r1_uint1 x140; + uint64_t x141; + fiat_secp384r1_uint1 x142; + uint64_t x143; + fiat_secp384r1_uint1 x144; + uint64_t x145; + uint64_t x146; + uint64_t x147; + uint64_t x148; + uint64_t x149; + uint64_t x150; + uint64_t x151; + uint64_t x152; + uint64_t x153; + uint64_t x154; + uint64_t x155; + uint64_t x156; + uint64_t x157; + uint64_t x158; + fiat_secp384r1_uint1 x159; + uint64_t x160; + fiat_secp384r1_uint1 x161; + uint64_t x162; + fiat_secp384r1_uint1 x163; + uint64_t x164; + fiat_secp384r1_uint1 x165; + uint64_t x166; + fiat_secp384r1_uint1 x167; + uint64_t x168; + uint64_t x169; + fiat_secp384r1_uint1 x170; + uint64_t x171; + fiat_secp384r1_uint1 x172; + uint64_t x173; + fiat_secp384r1_uint1 x174; + uint64_t x175; + fiat_secp384r1_uint1 x176; + uint64_t x177; + fiat_secp384r1_uint1 x178; + uint64_t x179; + fiat_secp384r1_uint1 x180; + uint64_t x181; + fiat_secp384r1_uint1 x182; + uint64_t x183; + uint64_t x184; + uint64_t x185; + uint64_t x186; + uint64_t x187; + uint64_t x188; + uint64_t x189; + uint64_t x190; + uint64_t x191; + uint64_t x192; + uint64_t x193; + uint64_t x194; + uint64_t x195; + uint64_t x196; + uint64_t x197; + fiat_secp384r1_uint1 x198; + uint64_t x199; + fiat_secp384r1_uint1 x200; + uint64_t x201; + fiat_secp384r1_uint1 x202; + uint64_t x203; + fiat_secp384r1_uint1 x204; + uint64_t x205; + fiat_secp384r1_uint1 x206; + uint64_t x207; + uint64_t x208; + fiat_secp384r1_uint1 x209; + uint64_t x210; + fiat_secp384r1_uint1 x211; + uint64_t x212; + fiat_secp384r1_uint1 x213; + uint64_t x214; + fiat_secp384r1_uint1 x215; + uint64_t x216; + fiat_secp384r1_uint1 x217; + uint64_t x218; + fiat_secp384r1_uint1 x219; + uint64_t x220; + fiat_secp384r1_uint1 x221; + uint64_t x222; + uint64_t x223; + uint64_t x224; + uint64_t x225; + uint64_t x226; + uint64_t x227; + uint64_t x228; + uint64_t x229; + uint64_t x230; + uint64_t x231; + uint64_t x232; + uint64_t x233; + uint64_t x234; + uint64_t x235; + fiat_secp384r1_uint1 x236; + uint64_t x237; + fiat_secp384r1_uint1 x238; + uint64_t x239; + fiat_secp384r1_uint1 x240; + uint64_t x241; + fiat_secp384r1_uint1 x242; + uint64_t x243; + fiat_secp384r1_uint1 x244; + uint64_t x245; + uint64_t x246; + fiat_secp384r1_uint1 x247; + uint64_t x248; + fiat_secp384r1_uint1 x249; + uint64_t x250; + fiat_secp384r1_uint1 x251; + uint64_t x252; + fiat_secp384r1_uint1 x253; + uint64_t x254; + fiat_secp384r1_uint1 x255; + uint64_t x256; + fiat_secp384r1_uint1 x257; + uint64_t x258; + fiat_secp384r1_uint1 x259; + uint64_t x260; + uint64_t x261; + uint64_t x262; + uint64_t x263; + uint64_t x264; + uint64_t x265; + uint64_t x266; + uint64_t x267; + uint64_t x268; + uint64_t x269; + uint64_t x270; + uint64_t x271; + uint64_t x272; + uint64_t x273; + uint64_t x274; + fiat_secp384r1_uint1 x275; + uint64_t x276; + fiat_secp384r1_uint1 x277; + uint64_t x278; + fiat_secp384r1_uint1 x279; + uint64_t x280; + fiat_secp384r1_uint1 x281; + uint64_t x282; + fiat_secp384r1_uint1 x283; + uint64_t x284; + uint64_t x285; + fiat_secp384r1_uint1 x286; + uint64_t x287; + fiat_secp384r1_uint1 x288; + uint64_t x289; + fiat_secp384r1_uint1 x290; + uint64_t x291; + fiat_secp384r1_uint1 x292; + uint64_t x293; + fiat_secp384r1_uint1 x294; + uint64_t x295; + fiat_secp384r1_uint1 x296; + uint64_t x297; + fiat_secp384r1_uint1 x298; + uint64_t x299; + uint64_t x300; + uint64_t x301; + uint64_t x302; + uint64_t x303; + uint64_t x304; + uint64_t x305; + uint64_t x306; + uint64_t x307; + uint64_t x308; + uint64_t x309; + uint64_t x310; + uint64_t x311; + uint64_t x312; + fiat_secp384r1_uint1 x313; + uint64_t x314; + fiat_secp384r1_uint1 x315; + uint64_t x316; + fiat_secp384r1_uint1 x317; + uint64_t x318; + fiat_secp384r1_uint1 x319; + uint64_t x320; + fiat_secp384r1_uint1 x321; + uint64_t x322; + uint64_t x323; + fiat_secp384r1_uint1 x324; + uint64_t x325; + fiat_secp384r1_uint1 x326; + uint64_t x327; + fiat_secp384r1_uint1 x328; + uint64_t x329; + fiat_secp384r1_uint1 x330; + uint64_t x331; + fiat_secp384r1_uint1 x332; + uint64_t x333; + fiat_secp384r1_uint1 x334; + uint64_t x335; + fiat_secp384r1_uint1 x336; + uint64_t x337; + uint64_t x338; + uint64_t x339; + uint64_t x340; + uint64_t x341; + uint64_t x342; + uint64_t x343; + uint64_t x344; + uint64_t x345; + uint64_t x346; + uint64_t x347; + uint64_t x348; + uint64_t x349; + uint64_t x350; + uint64_t x351; + fiat_secp384r1_uint1 x352; + uint64_t x353; + fiat_secp384r1_uint1 x354; + uint64_t x355; + fiat_secp384r1_uint1 x356; + uint64_t x357; + fiat_secp384r1_uint1 x358; + uint64_t x359; + fiat_secp384r1_uint1 x360; + uint64_t x361; + uint64_t x362; + fiat_secp384r1_uint1 x363; + uint64_t x364; + fiat_secp384r1_uint1 x365; + uint64_t x366; + fiat_secp384r1_uint1 x367; + uint64_t x368; + fiat_secp384r1_uint1 x369; + uint64_t x370; + fiat_secp384r1_uint1 x371; + uint64_t x372; + fiat_secp384r1_uint1 x373; + uint64_t x374; + fiat_secp384r1_uint1 x375; + uint64_t x376; + uint64_t x377; + uint64_t x378; + uint64_t x379; + uint64_t x380; + uint64_t x381; + uint64_t x382; + uint64_t x383; + uint64_t x384; + uint64_t x385; + uint64_t x386; + uint64_t x387; + uint64_t x388; + uint64_t x389; + fiat_secp384r1_uint1 x390; + uint64_t x391; + fiat_secp384r1_uint1 x392; + uint64_t x393; + fiat_secp384r1_uint1 x394; + uint64_t x395; + fiat_secp384r1_uint1 x396; + uint64_t x397; + fiat_secp384r1_uint1 x398; + uint64_t x399; + uint64_t x400; + fiat_secp384r1_uint1 x401; + uint64_t x402; + fiat_secp384r1_uint1 x403; + uint64_t x404; + fiat_secp384r1_uint1 x405; + uint64_t x406; + fiat_secp384r1_uint1 x407; + uint64_t x408; + fiat_secp384r1_uint1 x409; + uint64_t x410; + fiat_secp384r1_uint1 x411; + uint64_t x412; + fiat_secp384r1_uint1 x413; + uint64_t x414; + uint64_t x415; + uint64_t x416; + uint64_t x417; + uint64_t x418; + uint64_t x419; + uint64_t x420; + uint64_t x421; + uint64_t x422; + uint64_t x423; + uint64_t x424; + uint64_t x425; + uint64_t x426; + uint64_t x427; + uint64_t x428; + fiat_secp384r1_uint1 x429; + uint64_t x430; + fiat_secp384r1_uint1 x431; + uint64_t x432; + fiat_secp384r1_uint1 x433; + uint64_t x434; + fiat_secp384r1_uint1 x435; + uint64_t x436; + fiat_secp384r1_uint1 x437; + uint64_t x438; + uint64_t x439; + fiat_secp384r1_uint1 x440; + uint64_t x441; + fiat_secp384r1_uint1 x442; + uint64_t x443; + fiat_secp384r1_uint1 x444; + uint64_t x445; + fiat_secp384r1_uint1 x446; + uint64_t x447; + fiat_secp384r1_uint1 x448; + uint64_t x449; + fiat_secp384r1_uint1 x450; + uint64_t x451; + fiat_secp384r1_uint1 x452; + uint64_t x453; + uint64_t x454; + fiat_secp384r1_uint1 x455; + uint64_t x456; + fiat_secp384r1_uint1 x457; + uint64_t x458; + fiat_secp384r1_uint1 x459; + uint64_t x460; + fiat_secp384r1_uint1 x461; + uint64_t x462; + fiat_secp384r1_uint1 x463; + uint64_t x464; + fiat_secp384r1_uint1 x465; + uint64_t x466; + fiat_secp384r1_uint1 x467; + uint64_t x468; + uint64_t x469; + uint64_t x470; + uint64_t x471; + uint64_t x472; + uint64_t x473; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[4]); + x5 = (arg1[5]); + x6 = (arg1[0]); + fiat_secp384r1_mulx_u64(&x7, &x8, x6, (arg1[5])); + fiat_secp384r1_mulx_u64(&x9, &x10, x6, (arg1[4])); + fiat_secp384r1_mulx_u64(&x11, &x12, x6, (arg1[3])); + fiat_secp384r1_mulx_u64(&x13, &x14, x6, (arg1[2])); + fiat_secp384r1_mulx_u64(&x15, &x16, x6, (arg1[1])); + fiat_secp384r1_mulx_u64(&x17, &x18, x6, (arg1[0])); + fiat_secp384r1_addcarryx_u64(&x19, &x20, 0x0, x18, x15); + fiat_secp384r1_addcarryx_u64(&x21, &x22, x20, x16, x13); + fiat_secp384r1_addcarryx_u64(&x23, &x24, x22, x14, x11); + fiat_secp384r1_addcarryx_u64(&x25, &x26, x24, x12, x9); + fiat_secp384r1_addcarryx_u64(&x27, &x28, x26, x10, x7); + x29 = (x28 + x8); + fiat_secp384r1_mulx_u64(&x30, &x31, x17, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x32, &x33, x30, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x34, &x35, x30, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x36, &x37, x30, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x38, &x39, x30, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x40, &x41, x30, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x42, &x43, x30, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x44, &x45, 0x0, x43, x40); + fiat_secp384r1_addcarryx_u64(&x46, &x47, x45, x41, x38); + fiat_secp384r1_addcarryx_u64(&x48, &x49, x47, x39, x36); + fiat_secp384r1_addcarryx_u64(&x50, &x51, x49, x37, x34); + fiat_secp384r1_addcarryx_u64(&x52, &x53, x51, x35, x32); + x54 = (x53 + x33); + fiat_secp384r1_addcarryx_u64(&x55, &x56, 0x0, x17, x42); + fiat_secp384r1_addcarryx_u64(&x57, &x58, x56, x19, x44); + fiat_secp384r1_addcarryx_u64(&x59, &x60, x58, x21, x46); + fiat_secp384r1_addcarryx_u64(&x61, &x62, x60, x23, x48); + fiat_secp384r1_addcarryx_u64(&x63, &x64, x62, x25, x50); + fiat_secp384r1_addcarryx_u64(&x65, &x66, x64, x27, x52); + fiat_secp384r1_addcarryx_u64(&x67, &x68, x66, x29, x54); + fiat_secp384r1_mulx_u64(&x69, &x70, x1, (arg1[5])); + fiat_secp384r1_mulx_u64(&x71, &x72, x1, (arg1[4])); + fiat_secp384r1_mulx_u64(&x73, &x74, x1, (arg1[3])); + fiat_secp384r1_mulx_u64(&x75, &x76, x1, (arg1[2])); + fiat_secp384r1_mulx_u64(&x77, &x78, x1, (arg1[1])); + fiat_secp384r1_mulx_u64(&x79, &x80, x1, (arg1[0])); + fiat_secp384r1_addcarryx_u64(&x81, &x82, 0x0, x80, x77); + fiat_secp384r1_addcarryx_u64(&x83, &x84, x82, x78, x75); + fiat_secp384r1_addcarryx_u64(&x85, &x86, x84, x76, x73); + fiat_secp384r1_addcarryx_u64(&x87, &x88, x86, x74, x71); + fiat_secp384r1_addcarryx_u64(&x89, &x90, x88, x72, x69); + x91 = (x90 + x70); + fiat_secp384r1_addcarryx_u64(&x92, &x93, 0x0, x57, x79); + fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x59, x81); + fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x61, x83); + fiat_secp384r1_addcarryx_u64(&x98, &x99, x97, x63, x85); + fiat_secp384r1_addcarryx_u64(&x100, &x101, x99, x65, x87); + fiat_secp384r1_addcarryx_u64(&x102, &x103, x101, x67, x89); + fiat_secp384r1_addcarryx_u64(&x104, &x105, x103, x68, x91); + fiat_secp384r1_mulx_u64(&x106, &x107, x92, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x108, &x109, x106, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x110, &x111, x106, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x112, &x113, x106, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x114, &x115, x106, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x116, &x117, x106, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x118, &x119, x106, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x120, &x121, 0x0, x119, x116); + fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x117, x114); + fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x115, x112); + fiat_secp384r1_addcarryx_u64(&x126, &x127, x125, x113, x110); + fiat_secp384r1_addcarryx_u64(&x128, &x129, x127, x111, x108); + x130 = (x129 + x109); + fiat_secp384r1_addcarryx_u64(&x131, &x132, 0x0, x92, x118); + fiat_secp384r1_addcarryx_u64(&x133, &x134, x132, x94, x120); + fiat_secp384r1_addcarryx_u64(&x135, &x136, x134, x96, x122); + fiat_secp384r1_addcarryx_u64(&x137, &x138, x136, x98, x124); + fiat_secp384r1_addcarryx_u64(&x139, &x140, x138, x100, x126); + fiat_secp384r1_addcarryx_u64(&x141, &x142, x140, x102, x128); + fiat_secp384r1_addcarryx_u64(&x143, &x144, x142, x104, x130); + x145 = ((uint64_t)x144 + x105); + fiat_secp384r1_mulx_u64(&x146, &x147, x2, (arg1[5])); + fiat_secp384r1_mulx_u64(&x148, &x149, x2, (arg1[4])); + fiat_secp384r1_mulx_u64(&x150, &x151, x2, (arg1[3])); + fiat_secp384r1_mulx_u64(&x152, &x153, x2, (arg1[2])); + fiat_secp384r1_mulx_u64(&x154, &x155, x2, (arg1[1])); + fiat_secp384r1_mulx_u64(&x156, &x157, x2, (arg1[0])); + fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x157, x154); + fiat_secp384r1_addcarryx_u64(&x160, &x161, x159, x155, x152); + fiat_secp384r1_addcarryx_u64(&x162, &x163, x161, x153, x150); + fiat_secp384r1_addcarryx_u64(&x164, &x165, x163, x151, x148); + fiat_secp384r1_addcarryx_u64(&x166, &x167, x165, x149, x146); + x168 = (x167 + x147); + fiat_secp384r1_addcarryx_u64(&x169, &x170, 0x0, x133, x156); + fiat_secp384r1_addcarryx_u64(&x171, &x172, x170, x135, x158); + fiat_secp384r1_addcarryx_u64(&x173, &x174, x172, x137, x160); + fiat_secp384r1_addcarryx_u64(&x175, &x176, x174, x139, x162); + fiat_secp384r1_addcarryx_u64(&x177, &x178, x176, x141, x164); + fiat_secp384r1_addcarryx_u64(&x179, &x180, x178, x143, x166); + fiat_secp384r1_addcarryx_u64(&x181, &x182, x180, x145, x168); + fiat_secp384r1_mulx_u64(&x183, &x184, x169, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x185, &x186, x183, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x187, &x188, x183, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x189, &x190, x183, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x191, &x192, x183, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x193, &x194, x183, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x195, &x196, x183, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x197, &x198, 0x0, x196, x193); + fiat_secp384r1_addcarryx_u64(&x199, &x200, x198, x194, x191); + fiat_secp384r1_addcarryx_u64(&x201, &x202, x200, x192, x189); + fiat_secp384r1_addcarryx_u64(&x203, &x204, x202, x190, x187); + fiat_secp384r1_addcarryx_u64(&x205, &x206, x204, x188, x185); + x207 = (x206 + x186); + fiat_secp384r1_addcarryx_u64(&x208, &x209, 0x0, x169, x195); + fiat_secp384r1_addcarryx_u64(&x210, &x211, x209, x171, x197); + fiat_secp384r1_addcarryx_u64(&x212, &x213, x211, x173, x199); + fiat_secp384r1_addcarryx_u64(&x214, &x215, x213, x175, x201); + fiat_secp384r1_addcarryx_u64(&x216, &x217, x215, x177, x203); + fiat_secp384r1_addcarryx_u64(&x218, &x219, x217, x179, x205); + fiat_secp384r1_addcarryx_u64(&x220, &x221, x219, x181, x207); + x222 = ((uint64_t)x221 + x182); + fiat_secp384r1_mulx_u64(&x223, &x224, x3, (arg1[5])); + fiat_secp384r1_mulx_u64(&x225, &x226, x3, (arg1[4])); + fiat_secp384r1_mulx_u64(&x227, &x228, x3, (arg1[3])); + fiat_secp384r1_mulx_u64(&x229, &x230, x3, (arg1[2])); + fiat_secp384r1_mulx_u64(&x231, &x232, x3, (arg1[1])); + fiat_secp384r1_mulx_u64(&x233, &x234, x3, (arg1[0])); + fiat_secp384r1_addcarryx_u64(&x235, &x236, 0x0, x234, x231); + fiat_secp384r1_addcarryx_u64(&x237, &x238, x236, x232, x229); + fiat_secp384r1_addcarryx_u64(&x239, &x240, x238, x230, x227); + fiat_secp384r1_addcarryx_u64(&x241, &x242, x240, x228, x225); + fiat_secp384r1_addcarryx_u64(&x243, &x244, x242, x226, x223); + x245 = (x244 + x224); + fiat_secp384r1_addcarryx_u64(&x246, &x247, 0x0, x210, x233); + fiat_secp384r1_addcarryx_u64(&x248, &x249, x247, x212, x235); + fiat_secp384r1_addcarryx_u64(&x250, &x251, x249, x214, x237); + fiat_secp384r1_addcarryx_u64(&x252, &x253, x251, x216, x239); + fiat_secp384r1_addcarryx_u64(&x254, &x255, x253, x218, x241); + fiat_secp384r1_addcarryx_u64(&x256, &x257, x255, x220, x243); + fiat_secp384r1_addcarryx_u64(&x258, &x259, x257, x222, x245); + fiat_secp384r1_mulx_u64(&x260, &x261, x246, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x262, &x263, x260, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x264, &x265, x260, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x266, &x267, x260, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x268, &x269, x260, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x270, &x271, x260, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x272, &x273, x260, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x274, &x275, 0x0, x273, x270); + fiat_secp384r1_addcarryx_u64(&x276, &x277, x275, x271, x268); + fiat_secp384r1_addcarryx_u64(&x278, &x279, x277, x269, x266); + fiat_secp384r1_addcarryx_u64(&x280, &x281, x279, x267, x264); + fiat_secp384r1_addcarryx_u64(&x282, &x283, x281, x265, x262); + x284 = (x283 + x263); + fiat_secp384r1_addcarryx_u64(&x285, &x286, 0x0, x246, x272); + fiat_secp384r1_addcarryx_u64(&x287, &x288, x286, x248, x274); + fiat_secp384r1_addcarryx_u64(&x289, &x290, x288, x250, x276); + fiat_secp384r1_addcarryx_u64(&x291, &x292, x290, x252, x278); + fiat_secp384r1_addcarryx_u64(&x293, &x294, x292, x254, x280); + fiat_secp384r1_addcarryx_u64(&x295, &x296, x294, x256, x282); + fiat_secp384r1_addcarryx_u64(&x297, &x298, x296, x258, x284); + x299 = ((uint64_t)x298 + x259); + fiat_secp384r1_mulx_u64(&x300, &x301, x4, (arg1[5])); + fiat_secp384r1_mulx_u64(&x302, &x303, x4, (arg1[4])); + fiat_secp384r1_mulx_u64(&x304, &x305, x4, (arg1[3])); + fiat_secp384r1_mulx_u64(&x306, &x307, x4, (arg1[2])); + fiat_secp384r1_mulx_u64(&x308, &x309, x4, (arg1[1])); + fiat_secp384r1_mulx_u64(&x310, &x311, x4, (arg1[0])); + fiat_secp384r1_addcarryx_u64(&x312, &x313, 0x0, x311, x308); + fiat_secp384r1_addcarryx_u64(&x314, &x315, x313, x309, x306); + fiat_secp384r1_addcarryx_u64(&x316, &x317, x315, x307, x304); + fiat_secp384r1_addcarryx_u64(&x318, &x319, x317, x305, x302); + fiat_secp384r1_addcarryx_u64(&x320, &x321, x319, x303, x300); + x322 = (x321 + x301); + fiat_secp384r1_addcarryx_u64(&x323, &x324, 0x0, x287, x310); + fiat_secp384r1_addcarryx_u64(&x325, &x326, x324, x289, x312); + fiat_secp384r1_addcarryx_u64(&x327, &x328, x326, x291, x314); + fiat_secp384r1_addcarryx_u64(&x329, &x330, x328, x293, x316); + fiat_secp384r1_addcarryx_u64(&x331, &x332, x330, x295, x318); + fiat_secp384r1_addcarryx_u64(&x333, &x334, x332, x297, x320); + fiat_secp384r1_addcarryx_u64(&x335, &x336, x334, x299, x322); + fiat_secp384r1_mulx_u64(&x337, &x338, x323, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x339, &x340, x337, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x341, &x342, x337, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x343, &x344, x337, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x345, &x346, x337, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x347, &x348, x337, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x349, &x350, x337, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x351, &x352, 0x0, x350, x347); + fiat_secp384r1_addcarryx_u64(&x353, &x354, x352, x348, x345); + fiat_secp384r1_addcarryx_u64(&x355, &x356, x354, x346, x343); + fiat_secp384r1_addcarryx_u64(&x357, &x358, x356, x344, x341); + fiat_secp384r1_addcarryx_u64(&x359, &x360, x358, x342, x339); + x361 = (x360 + x340); + fiat_secp384r1_addcarryx_u64(&x362, &x363, 0x0, x323, x349); + fiat_secp384r1_addcarryx_u64(&x364, &x365, x363, x325, x351); + fiat_secp384r1_addcarryx_u64(&x366, &x367, x365, x327, x353); + fiat_secp384r1_addcarryx_u64(&x368, &x369, x367, x329, x355); + fiat_secp384r1_addcarryx_u64(&x370, &x371, x369, x331, x357); + fiat_secp384r1_addcarryx_u64(&x372, &x373, x371, x333, x359); + fiat_secp384r1_addcarryx_u64(&x374, &x375, x373, x335, x361); + x376 = ((uint64_t)x375 + x336); + fiat_secp384r1_mulx_u64(&x377, &x378, x5, (arg1[5])); + fiat_secp384r1_mulx_u64(&x379, &x380, x5, (arg1[4])); + fiat_secp384r1_mulx_u64(&x381, &x382, x5, (arg1[3])); + fiat_secp384r1_mulx_u64(&x383, &x384, x5, (arg1[2])); + fiat_secp384r1_mulx_u64(&x385, &x386, x5, (arg1[1])); + fiat_secp384r1_mulx_u64(&x387, &x388, x5, (arg1[0])); + fiat_secp384r1_addcarryx_u64(&x389, &x390, 0x0, x388, x385); + fiat_secp384r1_addcarryx_u64(&x391, &x392, x390, x386, x383); + fiat_secp384r1_addcarryx_u64(&x393, &x394, x392, x384, x381); + fiat_secp384r1_addcarryx_u64(&x395, &x396, x394, x382, x379); + fiat_secp384r1_addcarryx_u64(&x397, &x398, x396, x380, x377); + x399 = (x398 + x378); + fiat_secp384r1_addcarryx_u64(&x400, &x401, 0x0, x364, x387); + fiat_secp384r1_addcarryx_u64(&x402, &x403, x401, x366, x389); + fiat_secp384r1_addcarryx_u64(&x404, &x405, x403, x368, x391); + fiat_secp384r1_addcarryx_u64(&x406, &x407, x405, x370, x393); + fiat_secp384r1_addcarryx_u64(&x408, &x409, x407, x372, x395); + fiat_secp384r1_addcarryx_u64(&x410, &x411, x409, x374, x397); + fiat_secp384r1_addcarryx_u64(&x412, &x413, x411, x376, x399); + fiat_secp384r1_mulx_u64(&x414, &x415, x400, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x416, &x417, x414, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x418, &x419, x414, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x420, &x421, x414, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x422, &x423, x414, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x424, &x425, x414, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x426, &x427, x414, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x428, &x429, 0x0, x427, x424); + fiat_secp384r1_addcarryx_u64(&x430, &x431, x429, x425, x422); + fiat_secp384r1_addcarryx_u64(&x432, &x433, x431, x423, x420); + fiat_secp384r1_addcarryx_u64(&x434, &x435, x433, x421, x418); + fiat_secp384r1_addcarryx_u64(&x436, &x437, x435, x419, x416); + x438 = (x437 + x417); + fiat_secp384r1_addcarryx_u64(&x439, &x440, 0x0, x400, x426); + fiat_secp384r1_addcarryx_u64(&x441, &x442, x440, x402, x428); + fiat_secp384r1_addcarryx_u64(&x443, &x444, x442, x404, x430); + fiat_secp384r1_addcarryx_u64(&x445, &x446, x444, x406, x432); + fiat_secp384r1_addcarryx_u64(&x447, &x448, x446, x408, x434); + fiat_secp384r1_addcarryx_u64(&x449, &x450, x448, x410, x436); + fiat_secp384r1_addcarryx_u64(&x451, &x452, x450, x412, x438); + x453 = ((uint64_t)x452 + x413); + fiat_secp384r1_subborrowx_u64(&x454, &x455, 0x0, x441, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x456, &x457, x455, x443, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x458, &x459, x457, x445, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x460, &x461, x459, x447, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x462, &x463, x461, x449, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x464, &x465, x463, x451, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x466, &x467, x465, x453, 0x0); + fiat_secp384r1_cmovznz_u64(&x468, x467, x454, x441); + fiat_secp384r1_cmovznz_u64(&x469, x467, x456, x443); + fiat_secp384r1_cmovznz_u64(&x470, x467, x458, x445); + fiat_secp384r1_cmovznz_u64(&x471, x467, x460, x447); + fiat_secp384r1_cmovznz_u64(&x472, x467, x462, x449); + fiat_secp384r1_cmovznz_u64(&x473, x467, x464, x451); + out1[0] = x468; + out1[1] = x469; + out1[2] = x470; + out1[3] = x471; + out1[4] = x472; + out1[5] = x473; +} + +/* + * The function fiat_secp384r1_add adds two field elements in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_add( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) +{ + uint64_t x1; + fiat_secp384r1_uint1 x2; + uint64_t x3; + fiat_secp384r1_uint1 x4; + uint64_t x5; + fiat_secp384r1_uint1 x6; + uint64_t x7; + fiat_secp384r1_uint1 x8; + uint64_t x9; + fiat_secp384r1_uint1 x10; + uint64_t x11; + fiat_secp384r1_uint1 x12; + uint64_t x13; + fiat_secp384r1_uint1 x14; + uint64_t x15; + fiat_secp384r1_uint1 x16; + uint64_t x17; + fiat_secp384r1_uint1 x18; + uint64_t x19; + fiat_secp384r1_uint1 x20; + uint64_t x21; + fiat_secp384r1_uint1 x22; + uint64_t x23; + fiat_secp384r1_uint1 x24; + uint64_t x25; + fiat_secp384r1_uint1 x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + fiat_secp384r1_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); + fiat_secp384r1_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1])); + fiat_secp384r1_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2])); + fiat_secp384r1_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3])); + fiat_secp384r1_addcarryx_u64(&x9, &x10, x8, (arg1[4]), (arg2[4])); + fiat_secp384r1_addcarryx_u64(&x11, &x12, x10, (arg1[5]), (arg2[5])); + fiat_secp384r1_subborrowx_u64(&x13, &x14, 0x0, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x15, &x16, x14, x3, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x17, &x18, x16, x5, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x19, &x20, x18, x7, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x21, &x22, x20, x9, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x23, &x24, x22, x11, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x25, &x26, x24, x12, 0x0); + fiat_secp384r1_cmovznz_u64(&x27, x26, x13, x1); + fiat_secp384r1_cmovznz_u64(&x28, x26, x15, x3); + fiat_secp384r1_cmovznz_u64(&x29, x26, x17, x5); + fiat_secp384r1_cmovznz_u64(&x30, x26, x19, x7); + fiat_secp384r1_cmovznz_u64(&x31, x26, x21, x9); + fiat_secp384r1_cmovznz_u64(&x32, x26, x23, x11); + out1[0] = x27; + out1[1] = x28; + out1[2] = x29; + out1[3] = x30; + out1[4] = x31; + out1[5] = x32; +} + +/* + * The function fiat_secp384r1_sub subtracts two field elements in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_sub( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) +{ + uint64_t x1; + fiat_secp384r1_uint1 x2; + uint64_t x3; + fiat_secp384r1_uint1 x4; + uint64_t x5; + fiat_secp384r1_uint1 x6; + uint64_t x7; + fiat_secp384r1_uint1 x8; + uint64_t x9; + fiat_secp384r1_uint1 x10; + uint64_t x11; + fiat_secp384r1_uint1 x12; + uint64_t x13; + uint64_t x14; + fiat_secp384r1_uint1 x15; + uint64_t x16; + fiat_secp384r1_uint1 x17; + uint64_t x18; + fiat_secp384r1_uint1 x19; + uint64_t x20; + fiat_secp384r1_uint1 x21; + uint64_t x22; + fiat_secp384r1_uint1 x23; + uint64_t x24; + fiat_secp384r1_uint1 x25; + fiat_secp384r1_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); + fiat_secp384r1_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1])); + fiat_secp384r1_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2])); + fiat_secp384r1_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3])); + fiat_secp384r1_subborrowx_u64(&x9, &x10, x8, (arg1[4]), (arg2[4])); + fiat_secp384r1_subborrowx_u64(&x11, &x12, x10, (arg1[5]), (arg2[5])); + fiat_secp384r1_cmovznz_u64(&x13, x12, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, x1, + (x13 & UINT32_C(0xffffffff))); + fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, x3, + (x13 & UINT64_C(0xffffffff00000000))); + fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, x5, + (x13 & UINT64_C(0xfffffffffffffffe))); + fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, x7, x13); + fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, x9, x13); + fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, x11, x13); + out1[0] = x14; + out1[1] = x16; + out1[2] = x18; + out1[3] = x20; + out1[4] = x22; + out1[5] = x24; +} + +/* + * The function fiat_secp384r1_opp negates a field element in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_opp( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) +{ + uint64_t x1; + fiat_secp384r1_uint1 x2; + uint64_t x3; + fiat_secp384r1_uint1 x4; + uint64_t x5; + fiat_secp384r1_uint1 x6; + uint64_t x7; + fiat_secp384r1_uint1 x8; + uint64_t x9; + fiat_secp384r1_uint1 x10; + uint64_t x11; + fiat_secp384r1_uint1 x12; + uint64_t x13; + uint64_t x14; + fiat_secp384r1_uint1 x15; + uint64_t x16; + fiat_secp384r1_uint1 x17; + uint64_t x18; + fiat_secp384r1_uint1 x19; + uint64_t x20; + fiat_secp384r1_uint1 x21; + uint64_t x22; + fiat_secp384r1_uint1 x23; + uint64_t x24; + fiat_secp384r1_uint1 x25; + fiat_secp384r1_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0])); + fiat_secp384r1_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1])); + fiat_secp384r1_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2])); + fiat_secp384r1_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3])); + fiat_secp384r1_subborrowx_u64(&x9, &x10, x8, 0x0, (arg1[4])); + fiat_secp384r1_subborrowx_u64(&x11, &x12, x10, 0x0, (arg1[5])); + fiat_secp384r1_cmovznz_u64(&x13, x12, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, x1, + (x13 & UINT32_C(0xffffffff))); + fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, x3, + (x13 & UINT64_C(0xffffffff00000000))); + fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, x5, + (x13 & UINT64_C(0xfffffffffffffffe))); + fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, x7, x13); + fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, x9, x13); + fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, x11, x13); + out1[0] = x14; + out1[1] = x16; + out1[2] = x18; + out1[3] = x20; + out1[4] = x22; + out1[5] = x24; +} + +/* + * The function fiat_secp384r1_from_montgomery translates a field element out of the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^6) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_from_montgomery( + fiat_secp384r1_non_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + fiat_secp384r1_uint1 x17; + uint64_t x18; + fiat_secp384r1_uint1 x19; + uint64_t x20; + fiat_secp384r1_uint1 x21; + uint64_t x22; + fiat_secp384r1_uint1 x23; + uint64_t x24; + fiat_secp384r1_uint1 x25; + uint64_t x26; + fiat_secp384r1_uint1 x27; + uint64_t x28; + fiat_secp384r1_uint1 x29; + uint64_t x30; + fiat_secp384r1_uint1 x31; + uint64_t x32; + fiat_secp384r1_uint1 x33; + uint64_t x34; + fiat_secp384r1_uint1 x35; + uint64_t x36; + fiat_secp384r1_uint1 x37; + uint64_t x38; + fiat_secp384r1_uint1 x39; + uint64_t x40; + fiat_secp384r1_uint1 x41; + uint64_t x42; + fiat_secp384r1_uint1 x43; + uint64_t x44; + fiat_secp384r1_uint1 x45; + uint64_t x46; + fiat_secp384r1_uint1 x47; + uint64_t x48; + fiat_secp384r1_uint1 x49; + uint64_t x50; + fiat_secp384r1_uint1 x51; + uint64_t x52; + uint64_t x53; + uint64_t x54; + uint64_t x55; + uint64_t x56; + uint64_t x57; + uint64_t x58; + uint64_t x59; + uint64_t x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + uint64_t x66; + fiat_secp384r1_uint1 x67; + uint64_t x68; + fiat_secp384r1_uint1 x69; + uint64_t x70; + fiat_secp384r1_uint1 x71; + uint64_t x72; + fiat_secp384r1_uint1 x73; + uint64_t x74; + fiat_secp384r1_uint1 x75; + uint64_t x76; + fiat_secp384r1_uint1 x77; + uint64_t x78; + fiat_secp384r1_uint1 x79; + uint64_t x80; + fiat_secp384r1_uint1 x81; + uint64_t x82; + fiat_secp384r1_uint1 x83; + uint64_t x84; + fiat_secp384r1_uint1 x85; + uint64_t x86; + fiat_secp384r1_uint1 x87; + uint64_t x88; + fiat_secp384r1_uint1 x89; + uint64_t x90; + fiat_secp384r1_uint1 x91; + uint64_t x92; + fiat_secp384r1_uint1 x93; + uint64_t x94; + fiat_secp384r1_uint1 x95; + uint64_t x96; + fiat_secp384r1_uint1 x97; + uint64_t x98; + fiat_secp384r1_uint1 x99; + uint64_t x100; + fiat_secp384r1_uint1 x101; + uint64_t x102; + uint64_t x103; + uint64_t x104; + uint64_t x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + uint64_t x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + fiat_secp384r1_uint1 x117; + uint64_t x118; + fiat_secp384r1_uint1 x119; + uint64_t x120; + fiat_secp384r1_uint1 x121; + uint64_t x122; + fiat_secp384r1_uint1 x123; + uint64_t x124; + fiat_secp384r1_uint1 x125; + uint64_t x126; + fiat_secp384r1_uint1 x127; + uint64_t x128; + fiat_secp384r1_uint1 x129; + uint64_t x130; + fiat_secp384r1_uint1 x131; + uint64_t x132; + fiat_secp384r1_uint1 x133; + uint64_t x134; + fiat_secp384r1_uint1 x135; + uint64_t x136; + fiat_secp384r1_uint1 x137; + uint64_t x138; + fiat_secp384r1_uint1 x139; + uint64_t x140; + fiat_secp384r1_uint1 x141; + uint64_t x142; + fiat_secp384r1_uint1 x143; + uint64_t x144; + fiat_secp384r1_uint1 x145; + uint64_t x146; + fiat_secp384r1_uint1 x147; + uint64_t x148; + fiat_secp384r1_uint1 x149; + uint64_t x150; + fiat_secp384r1_uint1 x151; + uint64_t x152; + uint64_t x153; + uint64_t x154; + uint64_t x155; + uint64_t x156; + uint64_t x157; + uint64_t x158; + uint64_t x159; + uint64_t x160; + uint64_t x161; + uint64_t x162; + uint64_t x163; + uint64_t x164; + uint64_t x165; + uint64_t x166; + fiat_secp384r1_uint1 x167; + uint64_t x168; + fiat_secp384r1_uint1 x169; + uint64_t x170; + fiat_secp384r1_uint1 x171; + uint64_t x172; + fiat_secp384r1_uint1 x173; + uint64_t x174; + fiat_secp384r1_uint1 x175; + uint64_t x176; + fiat_secp384r1_uint1 x177; + uint64_t x178; + fiat_secp384r1_uint1 x179; + uint64_t x180; + fiat_secp384r1_uint1 x181; + uint64_t x182; + fiat_secp384r1_uint1 x183; + uint64_t x184; + fiat_secp384r1_uint1 x185; + uint64_t x186; + fiat_secp384r1_uint1 x187; + uint64_t x188; + fiat_secp384r1_uint1 x189; + uint64_t x190; + fiat_secp384r1_uint1 x191; + uint64_t x192; + fiat_secp384r1_uint1 x193; + uint64_t x194; + fiat_secp384r1_uint1 x195; + uint64_t x196; + fiat_secp384r1_uint1 x197; + uint64_t x198; + fiat_secp384r1_uint1 x199; + uint64_t x200; + fiat_secp384r1_uint1 x201; + uint64_t x202; + uint64_t x203; + uint64_t x204; + uint64_t x205; + uint64_t x206; + uint64_t x207; + uint64_t x208; + uint64_t x209; + uint64_t x210; + uint64_t x211; + uint64_t x212; + uint64_t x213; + uint64_t x214; + uint64_t x215; + uint64_t x216; + fiat_secp384r1_uint1 x217; + uint64_t x218; + fiat_secp384r1_uint1 x219; + uint64_t x220; + fiat_secp384r1_uint1 x221; + uint64_t x222; + fiat_secp384r1_uint1 x223; + uint64_t x224; + fiat_secp384r1_uint1 x225; + uint64_t x226; + fiat_secp384r1_uint1 x227; + uint64_t x228; + fiat_secp384r1_uint1 x229; + uint64_t x230; + fiat_secp384r1_uint1 x231; + uint64_t x232; + fiat_secp384r1_uint1 x233; + uint64_t x234; + fiat_secp384r1_uint1 x235; + uint64_t x236; + fiat_secp384r1_uint1 x237; + uint64_t x238; + fiat_secp384r1_uint1 x239; + uint64_t x240; + fiat_secp384r1_uint1 x241; + uint64_t x242; + fiat_secp384r1_uint1 x243; + uint64_t x244; + fiat_secp384r1_uint1 x245; + uint64_t x246; + fiat_secp384r1_uint1 x247; + uint64_t x248; + fiat_secp384r1_uint1 x249; + uint64_t x250; + fiat_secp384r1_uint1 x251; + uint64_t x252; + uint64_t x253; + uint64_t x254; + uint64_t x255; + uint64_t x256; + uint64_t x257; + uint64_t x258; + uint64_t x259; + uint64_t x260; + uint64_t x261; + uint64_t x262; + uint64_t x263; + uint64_t x264; + uint64_t x265; + uint64_t x266; + fiat_secp384r1_uint1 x267; + uint64_t x268; + fiat_secp384r1_uint1 x269; + uint64_t x270; + fiat_secp384r1_uint1 x271; + uint64_t x272; + fiat_secp384r1_uint1 x273; + uint64_t x274; + fiat_secp384r1_uint1 x275; + uint64_t x276; + fiat_secp384r1_uint1 x277; + uint64_t x278; + fiat_secp384r1_uint1 x279; + uint64_t x280; + fiat_secp384r1_uint1 x281; + uint64_t x282; + fiat_secp384r1_uint1 x283; + uint64_t x284; + fiat_secp384r1_uint1 x285; + uint64_t x286; + fiat_secp384r1_uint1 x287; + uint64_t x288; + fiat_secp384r1_uint1 x289; + uint64_t x290; + fiat_secp384r1_uint1 x291; + uint64_t x292; + fiat_secp384r1_uint1 x293; + uint64_t x294; + fiat_secp384r1_uint1 x295; + uint64_t x296; + fiat_secp384r1_uint1 x297; + uint64_t x298; + fiat_secp384r1_uint1 x299; + uint64_t x300; + fiat_secp384r1_uint1 x301; + uint64_t x302; + fiat_secp384r1_uint1 x303; + uint64_t x304; + uint64_t x305; + uint64_t x306; + uint64_t x307; + uint64_t x308; + uint64_t x309; + x1 = (arg1[0]); + fiat_secp384r1_mulx_u64(&x2, &x3, x1, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x4, &x5, x2, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x6, &x7, x2, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x8, &x9, x2, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x10, &x11, x2, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x12, &x13, x2, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x14, &x15, x2, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x16, &x17, 0x0, x15, x12); + fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, x13, x10); + fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, x11, x8); + fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, x9, x6); + fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, x7, x4); + fiat_secp384r1_addcarryx_u64(&x26, &x27, 0x0, x1, x14); + fiat_secp384r1_addcarryx_u64(&x28, &x29, x27, 0x0, x16); + fiat_secp384r1_addcarryx_u64(&x30, &x31, x29, 0x0, x18); + fiat_secp384r1_addcarryx_u64(&x32, &x33, x31, 0x0, x20); + fiat_secp384r1_addcarryx_u64(&x34, &x35, x33, 0x0, x22); + fiat_secp384r1_addcarryx_u64(&x36, &x37, x35, 0x0, x24); + fiat_secp384r1_addcarryx_u64(&x38, &x39, x37, 0x0, (x25 + x5)); + fiat_secp384r1_addcarryx_u64(&x40, &x41, 0x0, x28, (arg1[1])); + fiat_secp384r1_addcarryx_u64(&x42, &x43, x41, x30, 0x0); + fiat_secp384r1_addcarryx_u64(&x44, &x45, x43, x32, 0x0); + fiat_secp384r1_addcarryx_u64(&x46, &x47, x45, x34, 0x0); + fiat_secp384r1_addcarryx_u64(&x48, &x49, x47, x36, 0x0); + fiat_secp384r1_addcarryx_u64(&x50, &x51, x49, x38, 0x0); + fiat_secp384r1_mulx_u64(&x52, &x53, x40, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x54, &x55, x52, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x56, &x57, x52, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x58, &x59, x52, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x60, &x61, x52, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x62, &x63, x52, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x64, &x65, x52, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x66, &x67, 0x0, x65, x62); + fiat_secp384r1_addcarryx_u64(&x68, &x69, x67, x63, x60); + fiat_secp384r1_addcarryx_u64(&x70, &x71, x69, x61, x58); + fiat_secp384r1_addcarryx_u64(&x72, &x73, x71, x59, x56); + fiat_secp384r1_addcarryx_u64(&x74, &x75, x73, x57, x54); + fiat_secp384r1_addcarryx_u64(&x76, &x77, 0x0, x40, x64); + fiat_secp384r1_addcarryx_u64(&x78, &x79, x77, x42, x66); + fiat_secp384r1_addcarryx_u64(&x80, &x81, x79, x44, x68); + fiat_secp384r1_addcarryx_u64(&x82, &x83, x81, x46, x70); + fiat_secp384r1_addcarryx_u64(&x84, &x85, x83, x48, x72); + fiat_secp384r1_addcarryx_u64(&x86, &x87, x85, x50, x74); + fiat_secp384r1_addcarryx_u64(&x88, &x89, x87, ((uint64_t)x51 + x39), + (x75 + x55)); + fiat_secp384r1_addcarryx_u64(&x90, &x91, 0x0, x78, (arg1[2])); + fiat_secp384r1_addcarryx_u64(&x92, &x93, x91, x80, 0x0); + fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x82, 0x0); + fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x84, 0x0); + fiat_secp384r1_addcarryx_u64(&x98, &x99, x97, x86, 0x0); + fiat_secp384r1_addcarryx_u64(&x100, &x101, x99, x88, 0x0); + fiat_secp384r1_mulx_u64(&x102, &x103, x90, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x104, &x105, x102, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x106, &x107, x102, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x108, &x109, x102, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x110, &x111, x102, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x112, &x113, x102, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x114, &x115, x102, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x116, &x117, 0x0, x115, x112); + fiat_secp384r1_addcarryx_u64(&x118, &x119, x117, x113, x110); + fiat_secp384r1_addcarryx_u64(&x120, &x121, x119, x111, x108); + fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x109, x106); + fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x107, x104); + fiat_secp384r1_addcarryx_u64(&x126, &x127, 0x0, x90, x114); + fiat_secp384r1_addcarryx_u64(&x128, &x129, x127, x92, x116); + fiat_secp384r1_addcarryx_u64(&x130, &x131, x129, x94, x118); + fiat_secp384r1_addcarryx_u64(&x132, &x133, x131, x96, x120); + fiat_secp384r1_addcarryx_u64(&x134, &x135, x133, x98, x122); + fiat_secp384r1_addcarryx_u64(&x136, &x137, x135, x100, x124); + fiat_secp384r1_addcarryx_u64(&x138, &x139, x137, ((uint64_t)x101 + x89), + (x125 + x105)); + fiat_secp384r1_addcarryx_u64(&x140, &x141, 0x0, x128, (arg1[3])); + fiat_secp384r1_addcarryx_u64(&x142, &x143, x141, x130, 0x0); + fiat_secp384r1_addcarryx_u64(&x144, &x145, x143, x132, 0x0); + fiat_secp384r1_addcarryx_u64(&x146, &x147, x145, x134, 0x0); + fiat_secp384r1_addcarryx_u64(&x148, &x149, x147, x136, 0x0); + fiat_secp384r1_addcarryx_u64(&x150, &x151, x149, x138, 0x0); + fiat_secp384r1_mulx_u64(&x152, &x153, x140, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x154, &x155, x152, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x156, &x157, x152, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x158, &x159, x152, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x160, &x161, x152, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x162, &x163, x152, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x164, &x165, x152, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x166, &x167, 0x0, x165, x162); + fiat_secp384r1_addcarryx_u64(&x168, &x169, x167, x163, x160); + fiat_secp384r1_addcarryx_u64(&x170, &x171, x169, x161, x158); + fiat_secp384r1_addcarryx_u64(&x172, &x173, x171, x159, x156); + fiat_secp384r1_addcarryx_u64(&x174, &x175, x173, x157, x154); + fiat_secp384r1_addcarryx_u64(&x176, &x177, 0x0, x140, x164); + fiat_secp384r1_addcarryx_u64(&x178, &x179, x177, x142, x166); + fiat_secp384r1_addcarryx_u64(&x180, &x181, x179, x144, x168); + fiat_secp384r1_addcarryx_u64(&x182, &x183, x181, x146, x170); + fiat_secp384r1_addcarryx_u64(&x184, &x185, x183, x148, x172); + fiat_secp384r1_addcarryx_u64(&x186, &x187, x185, x150, x174); + fiat_secp384r1_addcarryx_u64(&x188, &x189, x187, ((uint64_t)x151 + x139), + (x175 + x155)); + fiat_secp384r1_addcarryx_u64(&x190, &x191, 0x0, x178, (arg1[4])); + fiat_secp384r1_addcarryx_u64(&x192, &x193, x191, x180, 0x0); + fiat_secp384r1_addcarryx_u64(&x194, &x195, x193, x182, 0x0); + fiat_secp384r1_addcarryx_u64(&x196, &x197, x195, x184, 0x0); + fiat_secp384r1_addcarryx_u64(&x198, &x199, x197, x186, 0x0); + fiat_secp384r1_addcarryx_u64(&x200, &x201, x199, x188, 0x0); + fiat_secp384r1_mulx_u64(&x202, &x203, x190, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x204, &x205, x202, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x206, &x207, x202, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x208, &x209, x202, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x210, &x211, x202, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x212, &x213, x202, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x214, &x215, x202, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x216, &x217, 0x0, x215, x212); + fiat_secp384r1_addcarryx_u64(&x218, &x219, x217, x213, x210); + fiat_secp384r1_addcarryx_u64(&x220, &x221, x219, x211, x208); + fiat_secp384r1_addcarryx_u64(&x222, &x223, x221, x209, x206); + fiat_secp384r1_addcarryx_u64(&x224, &x225, x223, x207, x204); + fiat_secp384r1_addcarryx_u64(&x226, &x227, 0x0, x190, x214); + fiat_secp384r1_addcarryx_u64(&x228, &x229, x227, x192, x216); + fiat_secp384r1_addcarryx_u64(&x230, &x231, x229, x194, x218); + fiat_secp384r1_addcarryx_u64(&x232, &x233, x231, x196, x220); + fiat_secp384r1_addcarryx_u64(&x234, &x235, x233, x198, x222); + fiat_secp384r1_addcarryx_u64(&x236, &x237, x235, x200, x224); + fiat_secp384r1_addcarryx_u64(&x238, &x239, x237, ((uint64_t)x201 + x189), + (x225 + x205)); + fiat_secp384r1_addcarryx_u64(&x240, &x241, 0x0, x228, (arg1[5])); + fiat_secp384r1_addcarryx_u64(&x242, &x243, x241, x230, 0x0); + fiat_secp384r1_addcarryx_u64(&x244, &x245, x243, x232, 0x0); + fiat_secp384r1_addcarryx_u64(&x246, &x247, x245, x234, 0x0); + fiat_secp384r1_addcarryx_u64(&x248, &x249, x247, x236, 0x0); + fiat_secp384r1_addcarryx_u64(&x250, &x251, x249, x238, 0x0); + fiat_secp384r1_mulx_u64(&x252, &x253, x240, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x254, &x255, x252, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x256, &x257, x252, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x258, &x259, x252, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x260, &x261, x252, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x262, &x263, x252, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x264, &x265, x252, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x266, &x267, 0x0, x265, x262); + fiat_secp384r1_addcarryx_u64(&x268, &x269, x267, x263, x260); + fiat_secp384r1_addcarryx_u64(&x270, &x271, x269, x261, x258); + fiat_secp384r1_addcarryx_u64(&x272, &x273, x271, x259, x256); + fiat_secp384r1_addcarryx_u64(&x274, &x275, x273, x257, x254); + fiat_secp384r1_addcarryx_u64(&x276, &x277, 0x0, x240, x264); + fiat_secp384r1_addcarryx_u64(&x278, &x279, x277, x242, x266); + fiat_secp384r1_addcarryx_u64(&x280, &x281, x279, x244, x268); + fiat_secp384r1_addcarryx_u64(&x282, &x283, x281, x246, x270); + fiat_secp384r1_addcarryx_u64(&x284, &x285, x283, x248, x272); + fiat_secp384r1_addcarryx_u64(&x286, &x287, x285, x250, x274); + fiat_secp384r1_addcarryx_u64(&x288, &x289, x287, ((uint64_t)x251 + x239), + (x275 + x255)); + fiat_secp384r1_subborrowx_u64(&x290, &x291, 0x0, x278, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x292, &x293, x291, x280, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x294, &x295, x293, x282, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x296, &x297, x295, x284, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x298, &x299, x297, x286, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x300, &x301, x299, x288, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x302, &x303, x301, x289, 0x0); + fiat_secp384r1_cmovznz_u64(&x304, x303, x290, x278); + fiat_secp384r1_cmovznz_u64(&x305, x303, x292, x280); + fiat_secp384r1_cmovznz_u64(&x306, x303, x294, x282); + fiat_secp384r1_cmovznz_u64(&x307, x303, x296, x284); + fiat_secp384r1_cmovznz_u64(&x308, x303, x298, x286); + fiat_secp384r1_cmovznz_u64(&x309, x303, x300, x288); + out1[0] = x304; + out1[1] = x305; + out1[2] = x306; + out1[3] = x307; + out1[4] = x308; + out1[5] = x309; +} + +/* + * The function fiat_secp384r1_to_montgomery translates a field element into the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = eval arg1 mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_to_montgomery( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_non_montgomery_domain_field_element arg1) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + fiat_secp384r1_uint1 x16; + uint64_t x17; + fiat_secp384r1_uint1 x18; + uint64_t x19; + fiat_secp384r1_uint1 x20; + uint64_t x21; + fiat_secp384r1_uint1 x22; + uint64_t x23; + uint64_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + fiat_secp384r1_uint1 x38; + uint64_t x39; + fiat_secp384r1_uint1 x40; + uint64_t x41; + fiat_secp384r1_uint1 x42; + uint64_t x43; + fiat_secp384r1_uint1 x44; + uint64_t x45; + fiat_secp384r1_uint1 x46; + uint64_t x47; + fiat_secp384r1_uint1 x48; + uint64_t x49; + fiat_secp384r1_uint1 x50; + uint64_t x51; + fiat_secp384r1_uint1 x52; + uint64_t x53; + fiat_secp384r1_uint1 x54; + uint64_t x55; + fiat_secp384r1_uint1 x56; + uint64_t x57; + fiat_secp384r1_uint1 x58; + uint64_t x59; + fiat_secp384r1_uint1 x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + uint64_t x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + fiat_secp384r1_uint1 x70; + uint64_t x71; + fiat_secp384r1_uint1 x72; + uint64_t x73; + fiat_secp384r1_uint1 x74; + uint64_t x75; + fiat_secp384r1_uint1 x76; + uint64_t x77; + fiat_secp384r1_uint1 x78; + uint64_t x79; + fiat_secp384r1_uint1 x80; + uint64_t x81; + fiat_secp384r1_uint1 x82; + uint64_t x83; + fiat_secp384r1_uint1 x84; + uint64_t x85; + fiat_secp384r1_uint1 x86; + uint64_t x87; + fiat_secp384r1_uint1 x88; + uint64_t x89; + uint64_t x90; + uint64_t x91; + uint64_t x92; + uint64_t x93; + uint64_t x94; + uint64_t x95; + uint64_t x96; + uint64_t x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + fiat_secp384r1_uint1 x104; + uint64_t x105; + fiat_secp384r1_uint1 x106; + uint64_t x107; + fiat_secp384r1_uint1 x108; + uint64_t x109; + fiat_secp384r1_uint1 x110; + uint64_t x111; + fiat_secp384r1_uint1 x112; + uint64_t x113; + fiat_secp384r1_uint1 x114; + uint64_t x115; + fiat_secp384r1_uint1 x116; + uint64_t x117; + fiat_secp384r1_uint1 x118; + uint64_t x119; + fiat_secp384r1_uint1 x120; + uint64_t x121; + fiat_secp384r1_uint1 x122; + uint64_t x123; + fiat_secp384r1_uint1 x124; + uint64_t x125; + fiat_secp384r1_uint1 x126; + uint64_t x127; + uint64_t x128; + uint64_t x129; + uint64_t x130; + uint64_t x131; + uint64_t x132; + uint64_t x133; + uint64_t x134; + uint64_t x135; + fiat_secp384r1_uint1 x136; + uint64_t x137; + fiat_secp384r1_uint1 x138; + uint64_t x139; + fiat_secp384r1_uint1 x140; + uint64_t x141; + fiat_secp384r1_uint1 x142; + uint64_t x143; + fiat_secp384r1_uint1 x144; + uint64_t x145; + fiat_secp384r1_uint1 x146; + uint64_t x147; + fiat_secp384r1_uint1 x148; + uint64_t x149; + fiat_secp384r1_uint1 x150; + uint64_t x151; + fiat_secp384r1_uint1 x152; + uint64_t x153; + fiat_secp384r1_uint1 x154; + uint64_t x155; + uint64_t x156; + uint64_t x157; + uint64_t x158; + uint64_t x159; + uint64_t x160; + uint64_t x161; + uint64_t x162; + uint64_t x163; + uint64_t x164; + uint64_t x165; + uint64_t x166; + uint64_t x167; + uint64_t x168; + uint64_t x169; + fiat_secp384r1_uint1 x170; + uint64_t x171; + fiat_secp384r1_uint1 x172; + uint64_t x173; + fiat_secp384r1_uint1 x174; + uint64_t x175; + fiat_secp384r1_uint1 x176; + uint64_t x177; + fiat_secp384r1_uint1 x178; + uint64_t x179; + fiat_secp384r1_uint1 x180; + uint64_t x181; + fiat_secp384r1_uint1 x182; + uint64_t x183; + fiat_secp384r1_uint1 x184; + uint64_t x185; + fiat_secp384r1_uint1 x186; + uint64_t x187; + fiat_secp384r1_uint1 x188; + uint64_t x189; + fiat_secp384r1_uint1 x190; + uint64_t x191; + fiat_secp384r1_uint1 x192; + uint64_t x193; + uint64_t x194; + uint64_t x195; + uint64_t x196; + uint64_t x197; + uint64_t x198; + uint64_t x199; + uint64_t x200; + uint64_t x201; + fiat_secp384r1_uint1 x202; + uint64_t x203; + fiat_secp384r1_uint1 x204; + uint64_t x205; + fiat_secp384r1_uint1 x206; + uint64_t x207; + fiat_secp384r1_uint1 x208; + uint64_t x209; + fiat_secp384r1_uint1 x210; + uint64_t x211; + fiat_secp384r1_uint1 x212; + uint64_t x213; + fiat_secp384r1_uint1 x214; + uint64_t x215; + fiat_secp384r1_uint1 x216; + uint64_t x217; + fiat_secp384r1_uint1 x218; + uint64_t x219; + fiat_secp384r1_uint1 x220; + uint64_t x221; + uint64_t x222; + uint64_t x223; + uint64_t x224; + uint64_t x225; + uint64_t x226; + uint64_t x227; + uint64_t x228; + uint64_t x229; + uint64_t x230; + uint64_t x231; + uint64_t x232; + uint64_t x233; + uint64_t x234; + uint64_t x235; + fiat_secp384r1_uint1 x236; + uint64_t x237; + fiat_secp384r1_uint1 x238; + uint64_t x239; + fiat_secp384r1_uint1 x240; + uint64_t x241; + fiat_secp384r1_uint1 x242; + uint64_t x243; + fiat_secp384r1_uint1 x244; + uint64_t x245; + fiat_secp384r1_uint1 x246; + uint64_t x247; + fiat_secp384r1_uint1 x248; + uint64_t x249; + fiat_secp384r1_uint1 x250; + uint64_t x251; + fiat_secp384r1_uint1 x252; + uint64_t x253; + fiat_secp384r1_uint1 x254; + uint64_t x255; + fiat_secp384r1_uint1 x256; + uint64_t x257; + fiat_secp384r1_uint1 x258; + uint64_t x259; + uint64_t x260; + uint64_t x261; + uint64_t x262; + uint64_t x263; + uint64_t x264; + uint64_t x265; + uint64_t x266; + uint64_t x267; + fiat_secp384r1_uint1 x268; + uint64_t x269; + fiat_secp384r1_uint1 x270; + uint64_t x271; + fiat_secp384r1_uint1 x272; + uint64_t x273; + fiat_secp384r1_uint1 x274; + uint64_t x275; + fiat_secp384r1_uint1 x276; + uint64_t x277; + fiat_secp384r1_uint1 x278; + uint64_t x279; + fiat_secp384r1_uint1 x280; + uint64_t x281; + fiat_secp384r1_uint1 x282; + uint64_t x283; + fiat_secp384r1_uint1 x284; + uint64_t x285; + fiat_secp384r1_uint1 x286; + uint64_t x287; + uint64_t x288; + uint64_t x289; + uint64_t x290; + uint64_t x291; + uint64_t x292; + uint64_t x293; + uint64_t x294; + uint64_t x295; + uint64_t x296; + uint64_t x297; + uint64_t x298; + uint64_t x299; + uint64_t x300; + uint64_t x301; + fiat_secp384r1_uint1 x302; + uint64_t x303; + fiat_secp384r1_uint1 x304; + uint64_t x305; + fiat_secp384r1_uint1 x306; + uint64_t x307; + fiat_secp384r1_uint1 x308; + uint64_t x309; + fiat_secp384r1_uint1 x310; + uint64_t x311; + fiat_secp384r1_uint1 x312; + uint64_t x313; + fiat_secp384r1_uint1 x314; + uint64_t x315; + fiat_secp384r1_uint1 x316; + uint64_t x317; + fiat_secp384r1_uint1 x318; + uint64_t x319; + fiat_secp384r1_uint1 x320; + uint64_t x321; + fiat_secp384r1_uint1 x322; + uint64_t x323; + fiat_secp384r1_uint1 x324; + uint64_t x325; + uint64_t x326; + uint64_t x327; + uint64_t x328; + uint64_t x329; + uint64_t x330; + uint64_t x331; + uint64_t x332; + uint64_t x333; + fiat_secp384r1_uint1 x334; + uint64_t x335; + fiat_secp384r1_uint1 x336; + uint64_t x337; + fiat_secp384r1_uint1 x338; + uint64_t x339; + fiat_secp384r1_uint1 x340; + uint64_t x341; + fiat_secp384r1_uint1 x342; + uint64_t x343; + fiat_secp384r1_uint1 x344; + uint64_t x345; + fiat_secp384r1_uint1 x346; + uint64_t x347; + fiat_secp384r1_uint1 x348; + uint64_t x349; + fiat_secp384r1_uint1 x350; + uint64_t x351; + fiat_secp384r1_uint1 x352; + uint64_t x353; + uint64_t x354; + uint64_t x355; + uint64_t x356; + uint64_t x357; + uint64_t x358; + uint64_t x359; + uint64_t x360; + uint64_t x361; + uint64_t x362; + uint64_t x363; + uint64_t x364; + uint64_t x365; + uint64_t x366; + uint64_t x367; + fiat_secp384r1_uint1 x368; + uint64_t x369; + fiat_secp384r1_uint1 x370; + uint64_t x371; + fiat_secp384r1_uint1 x372; + uint64_t x373; + fiat_secp384r1_uint1 x374; + uint64_t x375; + fiat_secp384r1_uint1 x376; + uint64_t x377; + fiat_secp384r1_uint1 x378; + uint64_t x379; + fiat_secp384r1_uint1 x380; + uint64_t x381; + fiat_secp384r1_uint1 x382; + uint64_t x383; + fiat_secp384r1_uint1 x384; + uint64_t x385; + fiat_secp384r1_uint1 x386; + uint64_t x387; + fiat_secp384r1_uint1 x388; + uint64_t x389; + fiat_secp384r1_uint1 x390; + uint64_t x391; + fiat_secp384r1_uint1 x392; + uint64_t x393; + fiat_secp384r1_uint1 x394; + uint64_t x395; + fiat_secp384r1_uint1 x396; + uint64_t x397; + fiat_secp384r1_uint1 x398; + uint64_t x399; + fiat_secp384r1_uint1 x400; + uint64_t x401; + fiat_secp384r1_uint1 x402; + uint64_t x403; + fiat_secp384r1_uint1 x404; + uint64_t x405; + uint64_t x406; + uint64_t x407; + uint64_t x408; + uint64_t x409; + uint64_t x410; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[4]); + x5 = (arg1[5]); + x6 = (arg1[0]); + fiat_secp384r1_mulx_u64(&x7, &x8, x6, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x9, &x10, x6, UINT64_C(0xfffffffe00000000)); + fiat_secp384r1_mulx_u64(&x11, &x12, x6, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x13, &x14, x6, UINT64_C(0xfffffffe00000001)); + fiat_secp384r1_addcarryx_u64(&x15, &x16, 0x0, x14, x11); + fiat_secp384r1_addcarryx_u64(&x17, &x18, x16, x12, x9); + fiat_secp384r1_addcarryx_u64(&x19, &x20, x18, x10, x7); + fiat_secp384r1_addcarryx_u64(&x21, &x22, x20, x8, x6); + fiat_secp384r1_mulx_u64(&x23, &x24, x13, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x25, &x26, x23, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x27, &x28, x23, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x29, &x30, x23, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x31, &x32, x23, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x33, &x34, x23, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x35, &x36, x23, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x37, &x38, 0x0, x36, x33); + fiat_secp384r1_addcarryx_u64(&x39, &x40, x38, x34, x31); + fiat_secp384r1_addcarryx_u64(&x41, &x42, x40, x32, x29); + fiat_secp384r1_addcarryx_u64(&x43, &x44, x42, x30, x27); + fiat_secp384r1_addcarryx_u64(&x45, &x46, x44, x28, x25); + fiat_secp384r1_addcarryx_u64(&x47, &x48, 0x0, x13, x35); + fiat_secp384r1_addcarryx_u64(&x49, &x50, x48, x15, x37); + fiat_secp384r1_addcarryx_u64(&x51, &x52, x50, x17, x39); + fiat_secp384r1_addcarryx_u64(&x53, &x54, x52, x19, x41); + fiat_secp384r1_addcarryx_u64(&x55, &x56, x54, x21, x43); + fiat_secp384r1_addcarryx_u64(&x57, &x58, x56, x22, x45); + fiat_secp384r1_addcarryx_u64(&x59, &x60, x58, 0x0, (x46 + x26)); + fiat_secp384r1_mulx_u64(&x61, &x62, x1, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x63, &x64, x1, UINT64_C(0xfffffffe00000000)); + fiat_secp384r1_mulx_u64(&x65, &x66, x1, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x67, &x68, x1, UINT64_C(0xfffffffe00000001)); + fiat_secp384r1_addcarryx_u64(&x69, &x70, 0x0, x68, x65); + fiat_secp384r1_addcarryx_u64(&x71, &x72, x70, x66, x63); + fiat_secp384r1_addcarryx_u64(&x73, &x74, x72, x64, x61); + fiat_secp384r1_addcarryx_u64(&x75, &x76, x74, x62, x1); + fiat_secp384r1_addcarryx_u64(&x77, &x78, 0x0, x49, x67); + fiat_secp384r1_addcarryx_u64(&x79, &x80, x78, x51, x69); + fiat_secp384r1_addcarryx_u64(&x81, &x82, x80, x53, x71); + fiat_secp384r1_addcarryx_u64(&x83, &x84, x82, x55, x73); + fiat_secp384r1_addcarryx_u64(&x85, &x86, x84, x57, x75); + fiat_secp384r1_addcarryx_u64(&x87, &x88, x86, x59, x76); + fiat_secp384r1_mulx_u64(&x89, &x90, x77, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x91, &x92, x89, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x93, &x94, x89, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x95, &x96, x89, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x97, &x98, x89, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x99, &x100, x89, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x101, &x102, x89, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x103, &x104, 0x0, x102, x99); + fiat_secp384r1_addcarryx_u64(&x105, &x106, x104, x100, x97); + fiat_secp384r1_addcarryx_u64(&x107, &x108, x106, x98, x95); + fiat_secp384r1_addcarryx_u64(&x109, &x110, x108, x96, x93); + fiat_secp384r1_addcarryx_u64(&x111, &x112, x110, x94, x91); + fiat_secp384r1_addcarryx_u64(&x113, &x114, 0x0, x77, x101); + fiat_secp384r1_addcarryx_u64(&x115, &x116, x114, x79, x103); + fiat_secp384r1_addcarryx_u64(&x117, &x118, x116, x81, x105); + fiat_secp384r1_addcarryx_u64(&x119, &x120, x118, x83, x107); + fiat_secp384r1_addcarryx_u64(&x121, &x122, x120, x85, x109); + fiat_secp384r1_addcarryx_u64(&x123, &x124, x122, x87, x111); + fiat_secp384r1_addcarryx_u64(&x125, &x126, x124, ((uint64_t)x88 + x60), + (x112 + x92)); + fiat_secp384r1_mulx_u64(&x127, &x128, x2, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x129, &x130, x2, UINT64_C(0xfffffffe00000000)); + fiat_secp384r1_mulx_u64(&x131, &x132, x2, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x133, &x134, x2, UINT64_C(0xfffffffe00000001)); + fiat_secp384r1_addcarryx_u64(&x135, &x136, 0x0, x134, x131); + fiat_secp384r1_addcarryx_u64(&x137, &x138, x136, x132, x129); + fiat_secp384r1_addcarryx_u64(&x139, &x140, x138, x130, x127); + fiat_secp384r1_addcarryx_u64(&x141, &x142, x140, x128, x2); + fiat_secp384r1_addcarryx_u64(&x143, &x144, 0x0, x115, x133); + fiat_secp384r1_addcarryx_u64(&x145, &x146, x144, x117, x135); + fiat_secp384r1_addcarryx_u64(&x147, &x148, x146, x119, x137); + fiat_secp384r1_addcarryx_u64(&x149, &x150, x148, x121, x139); + fiat_secp384r1_addcarryx_u64(&x151, &x152, x150, x123, x141); + fiat_secp384r1_addcarryx_u64(&x153, &x154, x152, x125, x142); + fiat_secp384r1_mulx_u64(&x155, &x156, x143, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x157, &x158, x155, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x159, &x160, x155, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x161, &x162, x155, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x163, &x164, x155, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x165, &x166, x155, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x167, &x168, x155, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x169, &x170, 0x0, x168, x165); + fiat_secp384r1_addcarryx_u64(&x171, &x172, x170, x166, x163); + fiat_secp384r1_addcarryx_u64(&x173, &x174, x172, x164, x161); + fiat_secp384r1_addcarryx_u64(&x175, &x176, x174, x162, x159); + fiat_secp384r1_addcarryx_u64(&x177, &x178, x176, x160, x157); + fiat_secp384r1_addcarryx_u64(&x179, &x180, 0x0, x143, x167); + fiat_secp384r1_addcarryx_u64(&x181, &x182, x180, x145, x169); + fiat_secp384r1_addcarryx_u64(&x183, &x184, x182, x147, x171); + fiat_secp384r1_addcarryx_u64(&x185, &x186, x184, x149, x173); + fiat_secp384r1_addcarryx_u64(&x187, &x188, x186, x151, x175); + fiat_secp384r1_addcarryx_u64(&x189, &x190, x188, x153, x177); + fiat_secp384r1_addcarryx_u64(&x191, &x192, x190, ((uint64_t)x154 + x126), + (x178 + x158)); + fiat_secp384r1_mulx_u64(&x193, &x194, x3, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x195, &x196, x3, UINT64_C(0xfffffffe00000000)); + fiat_secp384r1_mulx_u64(&x197, &x198, x3, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x199, &x200, x3, UINT64_C(0xfffffffe00000001)); + fiat_secp384r1_addcarryx_u64(&x201, &x202, 0x0, x200, x197); + fiat_secp384r1_addcarryx_u64(&x203, &x204, x202, x198, x195); + fiat_secp384r1_addcarryx_u64(&x205, &x206, x204, x196, x193); + fiat_secp384r1_addcarryx_u64(&x207, &x208, x206, x194, x3); + fiat_secp384r1_addcarryx_u64(&x209, &x210, 0x0, x181, x199); + fiat_secp384r1_addcarryx_u64(&x211, &x212, x210, x183, x201); + fiat_secp384r1_addcarryx_u64(&x213, &x214, x212, x185, x203); + fiat_secp384r1_addcarryx_u64(&x215, &x216, x214, x187, x205); + fiat_secp384r1_addcarryx_u64(&x217, &x218, x216, x189, x207); + fiat_secp384r1_addcarryx_u64(&x219, &x220, x218, x191, x208); + fiat_secp384r1_mulx_u64(&x221, &x222, x209, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x223, &x224, x221, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x225, &x226, x221, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x227, &x228, x221, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x229, &x230, x221, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x231, &x232, x221, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x233, &x234, x221, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x235, &x236, 0x0, x234, x231); + fiat_secp384r1_addcarryx_u64(&x237, &x238, x236, x232, x229); + fiat_secp384r1_addcarryx_u64(&x239, &x240, x238, x230, x227); + fiat_secp384r1_addcarryx_u64(&x241, &x242, x240, x228, x225); + fiat_secp384r1_addcarryx_u64(&x243, &x244, x242, x226, x223); + fiat_secp384r1_addcarryx_u64(&x245, &x246, 0x0, x209, x233); + fiat_secp384r1_addcarryx_u64(&x247, &x248, x246, x211, x235); + fiat_secp384r1_addcarryx_u64(&x249, &x250, x248, x213, x237); + fiat_secp384r1_addcarryx_u64(&x251, &x252, x250, x215, x239); + fiat_secp384r1_addcarryx_u64(&x253, &x254, x252, x217, x241); + fiat_secp384r1_addcarryx_u64(&x255, &x256, x254, x219, x243); + fiat_secp384r1_addcarryx_u64(&x257, &x258, x256, ((uint64_t)x220 + x192), + (x244 + x224)); + fiat_secp384r1_mulx_u64(&x259, &x260, x4, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x261, &x262, x4, UINT64_C(0xfffffffe00000000)); + fiat_secp384r1_mulx_u64(&x263, &x264, x4, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x265, &x266, x4, UINT64_C(0xfffffffe00000001)); + fiat_secp384r1_addcarryx_u64(&x267, &x268, 0x0, x266, x263); + fiat_secp384r1_addcarryx_u64(&x269, &x270, x268, x264, x261); + fiat_secp384r1_addcarryx_u64(&x271, &x272, x270, x262, x259); + fiat_secp384r1_addcarryx_u64(&x273, &x274, x272, x260, x4); + fiat_secp384r1_addcarryx_u64(&x275, &x276, 0x0, x247, x265); + fiat_secp384r1_addcarryx_u64(&x277, &x278, x276, x249, x267); + fiat_secp384r1_addcarryx_u64(&x279, &x280, x278, x251, x269); + fiat_secp384r1_addcarryx_u64(&x281, &x282, x280, x253, x271); + fiat_secp384r1_addcarryx_u64(&x283, &x284, x282, x255, x273); + fiat_secp384r1_addcarryx_u64(&x285, &x286, x284, x257, x274); + fiat_secp384r1_mulx_u64(&x287, &x288, x275, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x289, &x290, x287, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x291, &x292, x287, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x293, &x294, x287, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x295, &x296, x287, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x297, &x298, x287, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x299, &x300, x287, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x301, &x302, 0x0, x300, x297); + fiat_secp384r1_addcarryx_u64(&x303, &x304, x302, x298, x295); + fiat_secp384r1_addcarryx_u64(&x305, &x306, x304, x296, x293); + fiat_secp384r1_addcarryx_u64(&x307, &x308, x306, x294, x291); + fiat_secp384r1_addcarryx_u64(&x309, &x310, x308, x292, x289); + fiat_secp384r1_addcarryx_u64(&x311, &x312, 0x0, x275, x299); + fiat_secp384r1_addcarryx_u64(&x313, &x314, x312, x277, x301); + fiat_secp384r1_addcarryx_u64(&x315, &x316, x314, x279, x303); + fiat_secp384r1_addcarryx_u64(&x317, &x318, x316, x281, x305); + fiat_secp384r1_addcarryx_u64(&x319, &x320, x318, x283, x307); + fiat_secp384r1_addcarryx_u64(&x321, &x322, x320, x285, x309); + fiat_secp384r1_addcarryx_u64(&x323, &x324, x322, ((uint64_t)x286 + x258), + (x310 + x290)); + fiat_secp384r1_mulx_u64(&x325, &x326, x5, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x327, &x328, x5, UINT64_C(0xfffffffe00000000)); + fiat_secp384r1_mulx_u64(&x329, &x330, x5, UINT64_C(0x200000000)); + fiat_secp384r1_mulx_u64(&x331, &x332, x5, UINT64_C(0xfffffffe00000001)); + fiat_secp384r1_addcarryx_u64(&x333, &x334, 0x0, x332, x329); + fiat_secp384r1_addcarryx_u64(&x335, &x336, x334, x330, x327); + fiat_secp384r1_addcarryx_u64(&x337, &x338, x336, x328, x325); + fiat_secp384r1_addcarryx_u64(&x339, &x340, x338, x326, x5); + fiat_secp384r1_addcarryx_u64(&x341, &x342, 0x0, x313, x331); + fiat_secp384r1_addcarryx_u64(&x343, &x344, x342, x315, x333); + fiat_secp384r1_addcarryx_u64(&x345, &x346, x344, x317, x335); + fiat_secp384r1_addcarryx_u64(&x347, &x348, x346, x319, x337); + fiat_secp384r1_addcarryx_u64(&x349, &x350, x348, x321, x339); + fiat_secp384r1_addcarryx_u64(&x351, &x352, x350, x323, x340); + fiat_secp384r1_mulx_u64(&x353, &x354, x341, UINT64_C(0x100000001)); + fiat_secp384r1_mulx_u64(&x355, &x356, x353, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x357, &x358, x353, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x359, &x360, x353, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_mulx_u64(&x361, &x362, x353, UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_mulx_u64(&x363, &x364, x353, UINT64_C(0xffffffff00000000)); + fiat_secp384r1_mulx_u64(&x365, &x366, x353, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u64(&x367, &x368, 0x0, x366, x363); + fiat_secp384r1_addcarryx_u64(&x369, &x370, x368, x364, x361); + fiat_secp384r1_addcarryx_u64(&x371, &x372, x370, x362, x359); + fiat_secp384r1_addcarryx_u64(&x373, &x374, x372, x360, x357); + fiat_secp384r1_addcarryx_u64(&x375, &x376, x374, x358, x355); + fiat_secp384r1_addcarryx_u64(&x377, &x378, 0x0, x341, x365); + fiat_secp384r1_addcarryx_u64(&x379, &x380, x378, x343, x367); + fiat_secp384r1_addcarryx_u64(&x381, &x382, x380, x345, x369); + fiat_secp384r1_addcarryx_u64(&x383, &x384, x382, x347, x371); + fiat_secp384r1_addcarryx_u64(&x385, &x386, x384, x349, x373); + fiat_secp384r1_addcarryx_u64(&x387, &x388, x386, x351, x375); + fiat_secp384r1_addcarryx_u64(&x389, &x390, x388, ((uint64_t)x352 + x324), + (x376 + x356)); + fiat_secp384r1_subborrowx_u64(&x391, &x392, 0x0, x379, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x393, &x394, x392, x381, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x395, &x396, x394, x383, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x397, &x398, x396, x385, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x399, &x400, x398, x387, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x401, &x402, x400, x389, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x403, &x404, x402, x390, 0x0); + fiat_secp384r1_cmovznz_u64(&x405, x404, x391, x379); + fiat_secp384r1_cmovznz_u64(&x406, x404, x393, x381); + fiat_secp384r1_cmovznz_u64(&x407, x404, x395, x383); + fiat_secp384r1_cmovznz_u64(&x408, x404, x397, x385); + fiat_secp384r1_cmovznz_u64(&x409, x404, x399, x387); + fiat_secp384r1_cmovznz_u64(&x410, x404, x401, x389); + out1[0] = x405; + out1[1] = x406; + out1[2] = x407; + out1[3] = x408; + out1[4] = x409; + out1[5] = x410; +} + +/* + * The function fiat_secp384r1_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0 + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + */ +static void +fiat_secp384r1_nonzero(uint64_t *out1, const uint64_t arg1[6]) +{ + uint64_t x1; + x1 = ((arg1[0]) | + ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | ((arg1[4]) | (arg1[5])))))); + *out1 = x1; +} + +/* + * The function fiat_secp384r1_selectznz is a multi-limb conditional select. + * + * Postconditions: + * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static void +fiat_secp384r1_selectznz(uint64_t out1[6], + fiat_secp384r1_uint1 arg1, + const uint64_t arg2[6], + const uint64_t arg3[6]) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + fiat_secp384r1_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0])); + fiat_secp384r1_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1])); + fiat_secp384r1_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2])); + fiat_secp384r1_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3])); + fiat_secp384r1_cmovznz_u64(&x5, arg1, (arg2[4]), (arg3[4])); + fiat_secp384r1_cmovznz_u64(&x6, arg1, (arg2[5]), (arg3[5])); + out1[0] = x1; + out1[1] = x2; + out1[2] = x3; + out1[3] = x4; + out1[4] = x5; + out1[5] = x6; +} + +/* + * The function fiat_secp384r1_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..47] + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]] + */ +static void +fiat_secp384r1_to_bytes(uint8_t out1[48], const uint64_t arg1[6]) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint8_t x7; + uint64_t x8; + uint8_t x9; + uint64_t x10; + uint8_t x11; + uint64_t x12; + uint8_t x13; + uint64_t x14; + uint8_t x15; + uint64_t x16; + uint8_t x17; + uint64_t x18; + uint8_t x19; + uint8_t x20; + uint8_t x21; + uint64_t x22; + uint8_t x23; + uint64_t x24; + uint8_t x25; + uint64_t x26; + uint8_t x27; + uint64_t x28; + uint8_t x29; + uint64_t x30; + uint8_t x31; + uint64_t x32; + uint8_t x33; + uint8_t x34; + uint8_t x35; + uint64_t x36; + uint8_t x37; + uint64_t x38; + uint8_t x39; + uint64_t x40; + uint8_t x41; + uint64_t x42; + uint8_t x43; + uint64_t x44; + uint8_t x45; + uint64_t x46; + uint8_t x47; + uint8_t x48; + uint8_t x49; + uint64_t x50; + uint8_t x51; + uint64_t x52; + uint8_t x53; + uint64_t x54; + uint8_t x55; + uint64_t x56; + uint8_t x57; + uint64_t x58; + uint8_t x59; + uint64_t x60; + uint8_t x61; + uint8_t x62; + uint8_t x63; + uint64_t x64; + uint8_t x65; + uint64_t x66; + uint8_t x67; + uint64_t x68; + uint8_t x69; + uint64_t x70; + uint8_t x71; + uint64_t x72; + uint8_t x73; + uint64_t x74; + uint8_t x75; + uint8_t x76; + uint8_t x77; + uint64_t x78; + uint8_t x79; + uint64_t x80; + uint8_t x81; + uint64_t x82; + uint8_t x83; + uint64_t x84; + uint8_t x85; + uint64_t x86; + uint8_t x87; + uint64_t x88; + uint8_t x89; + uint8_t x90; + x1 = (arg1[5]); + x2 = (arg1[4]); + x3 = (arg1[3]); + x4 = (arg1[2]); + x5 = (arg1[1]); + x6 = (arg1[0]); + x7 = (uint8_t)(x6 & UINT8_C(0xff)); + x8 = (x6 >> 8); + x9 = (uint8_t)(x8 & UINT8_C(0xff)); + x10 = (x8 >> 8); + x11 = (uint8_t)(x10 & UINT8_C(0xff)); + x12 = (x10 >> 8); + x13 = (uint8_t)(x12 & UINT8_C(0xff)); + x14 = (x12 >> 8); + x15 = (uint8_t)(x14 & UINT8_C(0xff)); + x16 = (x14 >> 8); + x17 = (uint8_t)(x16 & UINT8_C(0xff)); + x18 = (x16 >> 8); + x19 = (uint8_t)(x18 & UINT8_C(0xff)); + x20 = (uint8_t)(x18 >> 8); + x21 = (uint8_t)(x5 & UINT8_C(0xff)); + x22 = (x5 >> 8); + x23 = (uint8_t)(x22 & UINT8_C(0xff)); + x24 = (x22 >> 8); + x25 = (uint8_t)(x24 & UINT8_C(0xff)); + x26 = (x24 >> 8); + x27 = (uint8_t)(x26 & UINT8_C(0xff)); + x28 = (x26 >> 8); + x29 = (uint8_t)(x28 & UINT8_C(0xff)); + x30 = (x28 >> 8); + x31 = (uint8_t)(x30 & UINT8_C(0xff)); + x32 = (x30 >> 8); + x33 = (uint8_t)(x32 & UINT8_C(0xff)); + x34 = (uint8_t)(x32 >> 8); + x35 = (uint8_t)(x4 & UINT8_C(0xff)); + x36 = (x4 >> 8); + x37 = (uint8_t)(x36 & UINT8_C(0xff)); + x38 = (x36 >> 8); + x39 = (uint8_t)(x38 & UINT8_C(0xff)); + x40 = (x38 >> 8); + x41 = (uint8_t)(x40 & UINT8_C(0xff)); + x42 = (x40 >> 8); + x43 = (uint8_t)(x42 & UINT8_C(0xff)); + x44 = (x42 >> 8); + x45 = (uint8_t)(x44 & UINT8_C(0xff)); + x46 = (x44 >> 8); + x47 = (uint8_t)(x46 & UINT8_C(0xff)); + x48 = (uint8_t)(x46 >> 8); + x49 = (uint8_t)(x3 & UINT8_C(0xff)); + x50 = (x3 >> 8); + x51 = (uint8_t)(x50 & UINT8_C(0xff)); + x52 = (x50 >> 8); + x53 = (uint8_t)(x52 & UINT8_C(0xff)); + x54 = (x52 >> 8); + x55 = (uint8_t)(x54 & UINT8_C(0xff)); + x56 = (x54 >> 8); + x57 = (uint8_t)(x56 & UINT8_C(0xff)); + x58 = (x56 >> 8); + x59 = (uint8_t)(x58 & UINT8_C(0xff)); + x60 = (x58 >> 8); + x61 = (uint8_t)(x60 & UINT8_C(0xff)); + x62 = (uint8_t)(x60 >> 8); + x63 = (uint8_t)(x2 & UINT8_C(0xff)); + x64 = (x2 >> 8); + x65 = (uint8_t)(x64 & UINT8_C(0xff)); + x66 = (x64 >> 8); + x67 = (uint8_t)(x66 & UINT8_C(0xff)); + x68 = (x66 >> 8); + x69 = (uint8_t)(x68 & UINT8_C(0xff)); + x70 = (x68 >> 8); + x71 = (uint8_t)(x70 & UINT8_C(0xff)); + x72 = (x70 >> 8); + x73 = (uint8_t)(x72 & UINT8_C(0xff)); + x74 = (x72 >> 8); + x75 = (uint8_t)(x74 & UINT8_C(0xff)); + x76 = (uint8_t)(x74 >> 8); + x77 = (uint8_t)(x1 & UINT8_C(0xff)); + x78 = (x1 >> 8); + x79 = (uint8_t)(x78 & UINT8_C(0xff)); + x80 = (x78 >> 8); + x81 = (uint8_t)(x80 & UINT8_C(0xff)); + x82 = (x80 >> 8); + x83 = (uint8_t)(x82 & UINT8_C(0xff)); + x84 = (x82 >> 8); + x85 = (uint8_t)(x84 & UINT8_C(0xff)); + x86 = (x84 >> 8); + x87 = (uint8_t)(x86 & UINT8_C(0xff)); + x88 = (x86 >> 8); + x89 = (uint8_t)(x88 & UINT8_C(0xff)); + x90 = (uint8_t)(x88 >> 8); + out1[0] = x7; + out1[1] = x9; + out1[2] = x11; + out1[3] = x13; + out1[4] = x15; + out1[5] = x17; + out1[6] = x19; + out1[7] = x20; + out1[8] = x21; + out1[9] = x23; + out1[10] = x25; + out1[11] = x27; + out1[12] = x29; + out1[13] = x31; + out1[14] = x33; + out1[15] = x34; + out1[16] = x35; + out1[17] = x37; + out1[18] = x39; + out1[19] = x41; + out1[20] = x43; + out1[21] = x45; + out1[22] = x47; + out1[23] = x48; + out1[24] = x49; + out1[25] = x51; + out1[26] = x53; + out1[27] = x55; + out1[28] = x57; + out1[29] = x59; + out1[30] = x61; + out1[31] = x62; + out1[32] = x63; + out1[33] = x65; + out1[34] = x67; + out1[35] = x69; + out1[36] = x71; + out1[37] = x73; + out1[38] = x75; + out1[39] = x76; + out1[40] = x77; + out1[41] = x79; + out1[42] = x81; + out1[43] = x83; + out1[44] = x85; + out1[45] = x87; + out1[46] = x89; + out1[47] = x90; +} + +/* + * The function fiat_secp384r1_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order. + * + * Preconditions: + * 0 ≤ bytes_eval arg1 < m + * Postconditions: + * eval out1 mod m = bytes_eval arg1 mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static void +fiat_secp384r1_from_bytes(uint64_t out1[6], + const uint8_t arg1[48]) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint8_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint8_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + uint64_t x21; + uint64_t x22; + uint64_t x23; + uint8_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint8_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint8_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + uint64_t x45; + uint64_t x46; + uint64_t x47; + uint8_t x48; + uint64_t x49; + uint64_t x50; + uint64_t x51; + uint64_t x52; + uint64_t x53; + uint64_t x54; + uint64_t x55; + uint64_t x56; + uint64_t x57; + uint64_t x58; + uint64_t x59; + uint64_t x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + uint64_t x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + uint64_t x74; + uint64_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + uint64_t x82; + uint64_t x83; + uint64_t x84; + uint64_t x85; + uint64_t x86; + uint64_t x87; + uint64_t x88; + uint64_t x89; + uint64_t x90; + x1 = ((uint64_t)(arg1[47]) << 56); + x2 = ((uint64_t)(arg1[46]) << 48); + x3 = ((uint64_t)(arg1[45]) << 40); + x4 = ((uint64_t)(arg1[44]) << 32); + x5 = ((uint64_t)(arg1[43]) << 24); + x6 = ((uint64_t)(arg1[42]) << 16); + x7 = ((uint64_t)(arg1[41]) << 8); + x8 = (arg1[40]); + x9 = ((uint64_t)(arg1[39]) << 56); + x10 = ((uint64_t)(arg1[38]) << 48); + x11 = ((uint64_t)(arg1[37]) << 40); + x12 = ((uint64_t)(arg1[36]) << 32); + x13 = ((uint64_t)(arg1[35]) << 24); + x14 = ((uint64_t)(arg1[34]) << 16); + x15 = ((uint64_t)(arg1[33]) << 8); + x16 = (arg1[32]); + x17 = ((uint64_t)(arg1[31]) << 56); + x18 = ((uint64_t)(arg1[30]) << 48); + x19 = ((uint64_t)(arg1[29]) << 40); + x20 = ((uint64_t)(arg1[28]) << 32); + x21 = ((uint64_t)(arg1[27]) << 24); + x22 = ((uint64_t)(arg1[26]) << 16); + x23 = ((uint64_t)(arg1[25]) << 8); + x24 = (arg1[24]); + x25 = ((uint64_t)(arg1[23]) << 56); + x26 = ((uint64_t)(arg1[22]) << 48); + x27 = ((uint64_t)(arg1[21]) << 40); + x28 = ((uint64_t)(arg1[20]) << 32); + x29 = ((uint64_t)(arg1[19]) << 24); + x30 = ((uint64_t)(arg1[18]) << 16); + x31 = ((uint64_t)(arg1[17]) << 8); + x32 = (arg1[16]); + x33 = ((uint64_t)(arg1[15]) << 56); + x34 = ((uint64_t)(arg1[14]) << 48); + x35 = ((uint64_t)(arg1[13]) << 40); + x36 = ((uint64_t)(arg1[12]) << 32); + x37 = ((uint64_t)(arg1[11]) << 24); + x38 = ((uint64_t)(arg1[10]) << 16); + x39 = ((uint64_t)(arg1[9]) << 8); + x40 = (arg1[8]); + x41 = ((uint64_t)(arg1[7]) << 56); + x42 = ((uint64_t)(arg1[6]) << 48); + x43 = ((uint64_t)(arg1[5]) << 40); + x44 = ((uint64_t)(arg1[4]) << 32); + x45 = ((uint64_t)(arg1[3]) << 24); + x46 = ((uint64_t)(arg1[2]) << 16); + x47 = ((uint64_t)(arg1[1]) << 8); + x48 = (arg1[0]); + x49 = (x47 + (uint64_t)x48); + x50 = (x46 + x49); + x51 = (x45 + x50); + x52 = (x44 + x51); + x53 = (x43 + x52); + x54 = (x42 + x53); + x55 = (x41 + x54); + x56 = (x39 + (uint64_t)x40); + x57 = (x38 + x56); + x58 = (x37 + x57); + x59 = (x36 + x58); + x60 = (x35 + x59); + x61 = (x34 + x60); + x62 = (x33 + x61); + x63 = (x31 + (uint64_t)x32); + x64 = (x30 + x63); + x65 = (x29 + x64); + x66 = (x28 + x65); + x67 = (x27 + x66); + x68 = (x26 + x67); + x69 = (x25 + x68); + x70 = (x23 + (uint64_t)x24); + x71 = (x22 + x70); + x72 = (x21 + x71); + x73 = (x20 + x72); + x74 = (x19 + x73); + x75 = (x18 + x74); + x76 = (x17 + x75); + x77 = (x15 + (uint64_t)x16); + x78 = (x14 + x77); + x79 = (x13 + x78); + x80 = (x12 + x79); + x81 = (x11 + x80); + x82 = (x10 + x81); + x83 = (x9 + x82); + x84 = (x7 + (uint64_t)x8); + x85 = (x6 + x84); + x86 = (x5 + x85); + x87 = (x4 + x86); + x88 = (x3 + x87); + x89 = (x2 + x88); + x90 = (x1 + x89); + out1[0] = x55; + out1[1] = x62; + out1[2] = x69; + out1[3] = x76; + out1[4] = x83; + out1[5] = x90; +} + +/* + * The function fiat_secp384r1_divstep computes a divstep. + * + * Preconditions: + * 0 ≤ eval arg4 < m + * 0 ≤ eval arg5 < m + * Postconditions: + * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1) + * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2) + * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋) + * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m) + * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m) + * 0 ≤ eval out5 < m + * 0 ≤ eval out5 < m + * 0 ≤ eval out2 < m + * 0 ≤ eval out3 < m + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffffffffffff] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static void +fiat_secp384r1_divstep( + uint64_t *out1, uint64_t out2[7], uint64_t out3[7], uint64_t out4[6], + uint64_t out5[6], uint64_t arg1, const uint64_t arg2[7], + const uint64_t arg3[7], const uint64_t arg4[6], const uint64_t arg5[6]) +{ + uint64_t x1; + fiat_secp384r1_uint1 x2; + fiat_secp384r1_uint1 x3; + uint64_t x4; + fiat_secp384r1_uint1 x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + fiat_secp384r1_uint1 x15; + uint64_t x16; + fiat_secp384r1_uint1 x17; + uint64_t x18; + fiat_secp384r1_uint1 x19; + uint64_t x20; + fiat_secp384r1_uint1 x21; + uint64_t x22; + fiat_secp384r1_uint1 x23; + uint64_t x24; + fiat_secp384r1_uint1 x25; + uint64_t x26; + fiat_secp384r1_uint1 x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + fiat_secp384r1_uint1 x42; + uint64_t x43; + fiat_secp384r1_uint1 x44; + uint64_t x45; + fiat_secp384r1_uint1 x46; + uint64_t x47; + fiat_secp384r1_uint1 x48; + uint64_t x49; + fiat_secp384r1_uint1 x50; + uint64_t x51; + fiat_secp384r1_uint1 x52; + uint64_t x53; + fiat_secp384r1_uint1 x54; + uint64_t x55; + fiat_secp384r1_uint1 x56; + uint64_t x57; + fiat_secp384r1_uint1 x58; + uint64_t x59; + fiat_secp384r1_uint1 x60; + uint64_t x61; + fiat_secp384r1_uint1 x62; + uint64_t x63; + fiat_secp384r1_uint1 x64; + uint64_t x65; + fiat_secp384r1_uint1 x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + fiat_secp384r1_uint1 x74; + uint64_t x75; + fiat_secp384r1_uint1 x76; + uint64_t x77; + fiat_secp384r1_uint1 x78; + uint64_t x79; + fiat_secp384r1_uint1 x80; + uint64_t x81; + fiat_secp384r1_uint1 x82; + uint64_t x83; + fiat_secp384r1_uint1 x84; + uint64_t x85; + uint64_t x86; + fiat_secp384r1_uint1 x87; + uint64_t x88; + fiat_secp384r1_uint1 x89; + uint64_t x90; + fiat_secp384r1_uint1 x91; + uint64_t x92; + fiat_secp384r1_uint1 x93; + uint64_t x94; + fiat_secp384r1_uint1 x95; + uint64_t x96; + fiat_secp384r1_uint1 x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + fiat_secp384r1_uint1 x104; + uint64_t x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + fiat_secp384r1_uint1 x113; + uint64_t x114; + fiat_secp384r1_uint1 x115; + uint64_t x116; + fiat_secp384r1_uint1 x117; + uint64_t x118; + fiat_secp384r1_uint1 x119; + uint64_t x120; + fiat_secp384r1_uint1 x121; + uint64_t x122; + fiat_secp384r1_uint1 x123; + uint64_t x124; + fiat_secp384r1_uint1 x125; + uint64_t x126; + uint64_t x127; + uint64_t x128; + uint64_t x129; + uint64_t x130; + uint64_t x131; + uint64_t x132; + fiat_secp384r1_uint1 x133; + uint64_t x134; + fiat_secp384r1_uint1 x135; + uint64_t x136; + fiat_secp384r1_uint1 x137; + uint64_t x138; + fiat_secp384r1_uint1 x139; + uint64_t x140; + fiat_secp384r1_uint1 x141; + uint64_t x142; + fiat_secp384r1_uint1 x143; + uint64_t x144; + fiat_secp384r1_uint1 x145; + uint64_t x146; + fiat_secp384r1_uint1 x147; + uint64_t x148; + fiat_secp384r1_uint1 x149; + uint64_t x150; + fiat_secp384r1_uint1 x151; + uint64_t x152; + fiat_secp384r1_uint1 x153; + uint64_t x154; + fiat_secp384r1_uint1 x155; + uint64_t x156; + fiat_secp384r1_uint1 x157; + uint64_t x158; + fiat_secp384r1_uint1 x159; + uint64_t x160; + uint64_t x161; + uint64_t x162; + uint64_t x163; + uint64_t x164; + uint64_t x165; + uint64_t x166; + uint64_t x167; + uint64_t x168; + uint64_t x169; + uint64_t x170; + uint64_t x171; + uint64_t x172; + uint64_t x173; + uint64_t x174; + uint64_t x175; + uint64_t x176; + uint64_t x177; + uint64_t x178; + fiat_secp384r1_addcarryx_u64(&x1, &x2, 0x0, (~arg1), 0x1); + x3 = (fiat_secp384r1_uint1)((fiat_secp384r1_uint1)(x1 >> 63) & + (fiat_secp384r1_uint1)((arg3[0]) & 0x1)); + fiat_secp384r1_addcarryx_u64(&x4, &x5, 0x0, (~arg1), 0x1); + fiat_secp384r1_cmovznz_u64(&x6, x3, arg1, x4); + fiat_secp384r1_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0])); + fiat_secp384r1_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1])); + fiat_secp384r1_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2])); + fiat_secp384r1_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3])); + fiat_secp384r1_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4])); + fiat_secp384r1_cmovznz_u64(&x12, x3, (arg2[5]), (arg3[5])); + fiat_secp384r1_cmovznz_u64(&x13, x3, (arg2[6]), (arg3[6])); + fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, 0x1, (~(arg2[0]))); + fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, 0x0, (~(arg2[1]))); + fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, 0x0, (~(arg2[2]))); + fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, 0x0, (~(arg2[3]))); + fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, 0x0, (~(arg2[4]))); + fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, 0x0, (~(arg2[5]))); + fiat_secp384r1_addcarryx_u64(&x26, &x27, x25, 0x0, (~(arg2[6]))); + fiat_secp384r1_cmovznz_u64(&x28, x3, (arg3[0]), x14); + fiat_secp384r1_cmovznz_u64(&x29, x3, (arg3[1]), x16); + fiat_secp384r1_cmovznz_u64(&x30, x3, (arg3[2]), x18); + fiat_secp384r1_cmovznz_u64(&x31, x3, (arg3[3]), x20); + fiat_secp384r1_cmovznz_u64(&x32, x3, (arg3[4]), x22); + fiat_secp384r1_cmovznz_u64(&x33, x3, (arg3[5]), x24); + fiat_secp384r1_cmovznz_u64(&x34, x3, (arg3[6]), x26); + fiat_secp384r1_cmovznz_u64(&x35, x3, (arg4[0]), (arg5[0])); + fiat_secp384r1_cmovznz_u64(&x36, x3, (arg4[1]), (arg5[1])); + fiat_secp384r1_cmovznz_u64(&x37, x3, (arg4[2]), (arg5[2])); + fiat_secp384r1_cmovznz_u64(&x38, x3, (arg4[3]), (arg5[3])); + fiat_secp384r1_cmovznz_u64(&x39, x3, (arg4[4]), (arg5[4])); + fiat_secp384r1_cmovznz_u64(&x40, x3, (arg4[5]), (arg5[5])); + fiat_secp384r1_addcarryx_u64(&x41, &x42, 0x0, x35, x35); + fiat_secp384r1_addcarryx_u64(&x43, &x44, x42, x36, x36); + fiat_secp384r1_addcarryx_u64(&x45, &x46, x44, x37, x37); + fiat_secp384r1_addcarryx_u64(&x47, &x48, x46, x38, x38); + fiat_secp384r1_addcarryx_u64(&x49, &x50, x48, x39, x39); + fiat_secp384r1_addcarryx_u64(&x51, &x52, x50, x40, x40); + fiat_secp384r1_subborrowx_u64(&x53, &x54, 0x0, x41, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x55, &x56, x54, x43, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x57, &x58, x56, x45, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x59, &x60, x58, x47, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x61, &x62, x60, x49, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x63, &x64, x62, x51, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x65, &x66, x64, x52, 0x0); + x67 = (arg4[5]); + x68 = (arg4[4]); + x69 = (arg4[3]); + x70 = (arg4[2]); + x71 = (arg4[1]); + x72 = (arg4[0]); + fiat_secp384r1_subborrowx_u64(&x73, &x74, 0x0, 0x0, x72); + fiat_secp384r1_subborrowx_u64(&x75, &x76, x74, 0x0, x71); + fiat_secp384r1_subborrowx_u64(&x77, &x78, x76, 0x0, x70); + fiat_secp384r1_subborrowx_u64(&x79, &x80, x78, 0x0, x69); + fiat_secp384r1_subborrowx_u64(&x81, &x82, x80, 0x0, x68); + fiat_secp384r1_subborrowx_u64(&x83, &x84, x82, 0x0, x67); + fiat_secp384r1_cmovznz_u64(&x85, x84, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_addcarryx_u64(&x86, &x87, 0x0, x73, + (x85 & UINT32_C(0xffffffff))); + fiat_secp384r1_addcarryx_u64(&x88, &x89, x87, x75, + (x85 & UINT64_C(0xffffffff00000000))); + fiat_secp384r1_addcarryx_u64(&x90, &x91, x89, x77, + (x85 & UINT64_C(0xfffffffffffffffe))); + fiat_secp384r1_addcarryx_u64(&x92, &x93, x91, x79, x85); + fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x81, x85); + fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x83, x85); + fiat_secp384r1_cmovznz_u64(&x98, x3, (arg5[0]), x86); + fiat_secp384r1_cmovznz_u64(&x99, x3, (arg5[1]), x88); + fiat_secp384r1_cmovznz_u64(&x100, x3, (arg5[2]), x90); + fiat_secp384r1_cmovznz_u64(&x101, x3, (arg5[3]), x92); + fiat_secp384r1_cmovznz_u64(&x102, x3, (arg5[4]), x94); + fiat_secp384r1_cmovznz_u64(&x103, x3, (arg5[5]), x96); + x104 = (fiat_secp384r1_uint1)(x28 & 0x1); + fiat_secp384r1_cmovznz_u64(&x105, x104, 0x0, x7); + fiat_secp384r1_cmovznz_u64(&x106, x104, 0x0, x8); + fiat_secp384r1_cmovznz_u64(&x107, x104, 0x0, x9); + fiat_secp384r1_cmovznz_u64(&x108, x104, 0x0, x10); + fiat_secp384r1_cmovznz_u64(&x109, x104, 0x0, x11); + fiat_secp384r1_cmovznz_u64(&x110, x104, 0x0, x12); + fiat_secp384r1_cmovznz_u64(&x111, x104, 0x0, x13); + fiat_secp384r1_addcarryx_u64(&x112, &x113, 0x0, x28, x105); + fiat_secp384r1_addcarryx_u64(&x114, &x115, x113, x29, x106); + fiat_secp384r1_addcarryx_u64(&x116, &x117, x115, x30, x107); + fiat_secp384r1_addcarryx_u64(&x118, &x119, x117, x31, x108); + fiat_secp384r1_addcarryx_u64(&x120, &x121, x119, x32, x109); + fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x33, x110); + fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x34, x111); + fiat_secp384r1_cmovznz_u64(&x126, x104, 0x0, x35); + fiat_secp384r1_cmovznz_u64(&x127, x104, 0x0, x36); + fiat_secp384r1_cmovznz_u64(&x128, x104, 0x0, x37); + fiat_secp384r1_cmovznz_u64(&x129, x104, 0x0, x38); + fiat_secp384r1_cmovznz_u64(&x130, x104, 0x0, x39); + fiat_secp384r1_cmovznz_u64(&x131, x104, 0x0, x40); + fiat_secp384r1_addcarryx_u64(&x132, &x133, 0x0, x98, x126); + fiat_secp384r1_addcarryx_u64(&x134, &x135, x133, x99, x127); + fiat_secp384r1_addcarryx_u64(&x136, &x137, x135, x100, x128); + fiat_secp384r1_addcarryx_u64(&x138, &x139, x137, x101, x129); + fiat_secp384r1_addcarryx_u64(&x140, &x141, x139, x102, x130); + fiat_secp384r1_addcarryx_u64(&x142, &x143, x141, x103, x131); + fiat_secp384r1_subborrowx_u64(&x144, &x145, 0x0, x132, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x146, &x147, x145, x134, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x148, &x149, x147, x136, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x150, &x151, x149, x138, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x152, &x153, x151, x140, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x154, &x155, x153, x142, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x156, &x157, x155, x143, 0x0); + fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x6, 0x1); + x160 = ((x112 >> 1) | ((x114 << 63) & UINT64_C(0xffffffffffffffff))); + x161 = ((x114 >> 1) | ((x116 << 63) & UINT64_C(0xffffffffffffffff))); + x162 = ((x116 >> 1) | ((x118 << 63) & UINT64_C(0xffffffffffffffff))); + x163 = ((x118 >> 1) | ((x120 << 63) & UINT64_C(0xffffffffffffffff))); + x164 = ((x120 >> 1) | ((x122 << 63) & UINT64_C(0xffffffffffffffff))); + x165 = ((x122 >> 1) | ((x124 << 63) & UINT64_C(0xffffffffffffffff))); + x166 = ((x124 & UINT64_C(0x8000000000000000)) | (x124 >> 1)); + fiat_secp384r1_cmovznz_u64(&x167, x66, x53, x41); + fiat_secp384r1_cmovznz_u64(&x168, x66, x55, x43); + fiat_secp384r1_cmovznz_u64(&x169, x66, x57, x45); + fiat_secp384r1_cmovznz_u64(&x170, x66, x59, x47); + fiat_secp384r1_cmovznz_u64(&x171, x66, x61, x49); + fiat_secp384r1_cmovznz_u64(&x172, x66, x63, x51); + fiat_secp384r1_cmovznz_u64(&x173, x157, x144, x132); + fiat_secp384r1_cmovznz_u64(&x174, x157, x146, x134); + fiat_secp384r1_cmovznz_u64(&x175, x157, x148, x136); + fiat_secp384r1_cmovznz_u64(&x176, x157, x150, x138); + fiat_secp384r1_cmovznz_u64(&x177, x157, x152, x140); + fiat_secp384r1_cmovznz_u64(&x178, x157, x154, x142); + *out1 = x158; + out2[0] = x7; + out2[1] = x8; + out2[2] = x9; + out2[3] = x10; + out2[4] = x11; + out2[5] = x12; + out2[6] = x13; + out3[0] = x160; + out3[1] = x161; + out3[2] = x162; + out3[3] = x163; + out3[4] = x164; + out3[5] = x165; + out3[6] = x166; + out4[0] = x167; + out4[1] = x168; + out4[2] = x169; + out4[3] = x170; + out4[4] = x171; + out4[5] = x172; + out5[0] = x173; + out5[1] = x174; + out5[2] = x175; + out5[3] = x176; + out5[4] = x177; + out5[5] = x178; +} + +/* END verbatim fiat code */ + +/* curve-related constants */ + +static const limb_t const_one[6] = { + UINT64_C(0xFFFFFFFF00000001), UINT64_C(0x00000000FFFFFFFF), + UINT64_C(0x0000000000000001), UINT64_C(0x0000000000000000), + UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000) +}; + +static const limb_t const_b[6] = { + UINT64_C(0x081188719D412DCC), UINT64_C(0xF729ADD87A4C32EC), + UINT64_C(0x77F2209B1920022E), UINT64_C(0xE3374BEE94938AE2), + UINT64_C(0xB62B21F41F022094), UINT64_C(0xCD08114B604FBFF9) +}; + +static const limb_t const_divstep[6] = { + UINT64_C(0xFFFFC80000005000), UINT64_C(0xFFFFB3FFFFFF83FF), + UINT64_C(0xFFFFF7FFFFFFFFFF), UINT64_C(0xFFFFEBFFFFFFEFFF), + UINT64_C(0x00000BFFFFFFF3FF), UINT64_C(0x0000500000003000) +}; + +static const limb_t const_psat[6] = { + UINT64_C(0x00000000FFFFFFFF), UINT64_C(0xFFFFFFFF00000000), + UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(0xFFFFFFFFFFFFFFFF), + UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF) +}; + +/* LUT for scalar multiplication by comb interleaving */ +static const pt_aff_t lut_cmb[21][16] = { + { + { { UINT64_C(0x3DD0756649C0B528), UINT64_C(0x20E378E2A0D6CE38), + UINT64_C(0x879C3AFC541B4D6E), UINT64_C(0x6454868459A30EFF), + UINT64_C(0x812FF723614EDE2B), UINT64_C(0x4D3AADC2299E1513) }, + { UINT64_C(0x23043DAD4B03A4FE), UINT64_C(0xA1BFA8BF7BB4A9AC), + UINT64_C(0x8BADE7562E83B050), UINT64_C(0xC6C3521968F4FFD9), + UINT64_C(0xDD8002263969A840), UINT64_C(0x2B78ABC25A15C5E9) } }, + { { UINT64_C(0x05E4DBE6C1DC4073), UINT64_C(0xC54EA9FFF04F779C), + UINT64_C(0x6B2034E9A170CCF0), UINT64_C(0x3A48D732D51C6C3E), + UINT64_C(0xE36F7E2D263AA470), UINT64_C(0xD283FE68E7C1C3AC) }, + { UINT64_C(0x7E284821C04EE157), UINT64_C(0x92D789A77AE0E36D), + UINT64_C(0x132663C04EF67446), UINT64_C(0x68012D5AD2E1D0B4), + UINT64_C(0xF6DB68B15102B339), UINT64_C(0x465465FC983292AF) } }, + { { UINT64_C(0xBB595EBA68F1F0DF), UINT64_C(0xC185C0CBCC873466), + UINT64_C(0x7F1EB1B5293C703B), UINT64_C(0x60DB2CF5AACC05E6), + UINT64_C(0xC676B987E2E8E4C6), UINT64_C(0xE1BB26B11D178FFB) }, + { UINT64_C(0x2B694BA07073FA21), UINT64_C(0x22C16E2E72F34566), + UINT64_C(0x80B61B3101C35B99), UINT64_C(0x4B237FAF982C0411), + UINT64_C(0xE6C5944024DE236D), UINT64_C(0x4DB1C9D6E209E4A3) } }, + { { UINT64_C(0xDF13B9D17D69222B), UINT64_C(0x4CE6415F874774B1), + UINT64_C(0x731EDCF8211FAA95), UINT64_C(0x5F4215D1659753ED), + UINT64_C(0xF893DB589DB2DF55), UINT64_C(0x932C9F811C89025B) }, + { UINT64_C(0x0996B2207706A61E), UINT64_C(0x135349D5A8641C79), + UINT64_C(0x65AAD76F50130844), UINT64_C(0x0FF37C0401FFF780), + UINT64_C(0xF57F238E693B0706), UINT64_C(0xD90A16B6AF6C9B3E) } }, + { { UINT64_C(0x2F5D200E2353B92F), UINT64_C(0xE35D87293FD7E4F9), + UINT64_C(0x26094833A96D745D), UINT64_C(0xDC351DC13CBFFF3F), + UINT64_C(0x26D464C6DAD54D6A), UINT64_C(0x5CAB1D1D53636C6A) }, + { UINT64_C(0xF2813072B18EC0B0), UINT64_C(0x3777E270D742AA2F), + UINT64_C(0x27F061C7033CA7C2), UINT64_C(0xA6ECACCC68EAD0D8), + UINT64_C(0x7D9429F4EE69A754), UINT64_C(0xE770633431E8F5C6) } }, + { { UINT64_C(0xC7708B19B68B8C7D), UINT64_C(0x4532077C44377ABA), + UINT64_C(0x0DCC67706CDAD64F), UINT64_C(0x01B8BF56147B6602), + UINT64_C(0xF8D89885F0561D79), UINT64_C(0x9C19E9FC7BA9C437) }, + { UINT64_C(0x764EB146BDC4BA25), UINT64_C(0x604FE46BAC144B83), + UINT64_C(0x3CE813298A77E780), UINT64_C(0x2E070F36FE9E682E), + UINT64_C(0x41821D0C3A53287A), UINT64_C(0x9AA62F9F3533F918) } }, + { { UINT64_C(0x9B7AEB7E75CCBDFB), UINT64_C(0xB25E28C5F6749A95), + UINT64_C(0x8A7A8E4633B7D4AE), UINT64_C(0xDB5203A8D9C1BD56), + UINT64_C(0xD2657265ED22DF97), UINT64_C(0xB51C56E18CF23C94) }, + { UINT64_C(0xF4D394596C3D812D), UINT64_C(0xD8E88F1A87CAE0C2), + UINT64_C(0x789A2A48CF4D0FE3), UINT64_C(0xB7FEAC2DFEC38D60), + UINT64_C(0x81FDBD1C3B490EC3), UINT64_C(0x4617ADB7CC6979E1) } }, + { { UINT64_C(0x446AD8884709F4A9), UINT64_C(0x2B7210E2EC3DABD8), + UINT64_C(0x83CCF19550E07B34), UINT64_C(0x59500917789B3075), + UINT64_C(0x0FC01FD4EB085993), UINT64_C(0xFB62D26F4903026B) }, + { UINT64_C(0x2309CC9D6FE989BB), UINT64_C(0x61609CBD144BD586), + UINT64_C(0x4B23D3A0DE06610C), UINT64_C(0xDDDC2866D898F470), + UINT64_C(0x8733FC41400C5797), UINT64_C(0x5A68C6FED0BC2716) } }, + { { UINT64_C(0x8903E1304B4A3CD0), UINT64_C(0x3EA4EA4C8FF1F43E), + UINT64_C(0xE6FC3F2AF655A10D), UINT64_C(0x7BE3737D524FFEFC), + UINT64_C(0x9F6928555330455E), UINT64_C(0x524F166EE475CE70) }, + { UINT64_C(0x3FCC69CD6C12F055), UINT64_C(0x4E23B6FFD5B9C0DA), + UINT64_C(0x49CE6993336BF183), UINT64_C(0xF87D6D854A54504A), + UINT64_C(0x25EB5DF1B3C2677A), UINT64_C(0xAC37986F55B164C9) } }, + { { UINT64_C(0x82A2ED4ABAA84C08), UINT64_C(0x22C4CC5F41A8C912), + UINT64_C(0xCA109C3B154AAD5E), UINT64_C(0x23891298FC38538E), + UINT64_C(0xB3B6639C539802AE), UINT64_C(0xFA0F1F450390D706) }, + { UINT64_C(0x46B78E5DB0DC21D0), UINT64_C(0xA8C72D3CC3DA2EAC), + UINT64_C(0x9170B3786FF2F643), UINT64_C(0x3F5A799BB67F30C3), + UINT64_C(0x15D1DC778264B672), UINT64_C(0xA1D47B23E9577764) } }, + { { UINT64_C(0x08265E510422CE2F), UINT64_C(0x88E0D496DD2F9E21), + UINT64_C(0x30128AA06177F75D), UINT64_C(0x2E59AB62BD9EBE69), + UINT64_C(0x1B1A0F6C5DF0E537), UINT64_C(0xAB16C626DAC012B5) }, + { UINT64_C(0x8014214B008C5DE7), UINT64_C(0xAA740A9E38F17BEA), + UINT64_C(0x262EBB498A149098), UINT64_C(0xB454111E8527CD59), + UINT64_C(0x266AD15AACEA5817), UINT64_C(0x21824F411353CCBA) } }, + { { UINT64_C(0xD1B4E74D12E3683B), UINT64_C(0x990ED20B569B8EF6), + UINT64_C(0xB9D3DD25429C0A18), UINT64_C(0x1C75B8AB2A351783), + UINT64_C(0x61E4CA2B905432F0), UINT64_C(0x80826A69EEA8F224) }, + { UINT64_C(0x7FC33A6BEC52ABAD), UINT64_C(0x0BCCA3F0A65E4813), + UINT64_C(0x7AD8A132A527CEBE), UINT64_C(0xF0138950EAF22C7E), + UINT64_C(0x282D2437566718C1), UINT64_C(0x9DFCCB0DE2212559) } }, + { { UINT64_C(0x1E93722758CE3B83), UINT64_C(0xBB280DFA3CB3FB36), + UINT64_C(0x57D0F3D2E2BE174A), UINT64_C(0x9BD51B99208ABE1E), + UINT64_C(0x3809AB50DE248024), UINT64_C(0xC29C6E2CA5BB7331) }, + { UINT64_C(0x9944FD2E61124F05), UINT64_C(0x83CCBC4E9009E391), + UINT64_C(0x01628F059424A3CC), UINT64_C(0xD6A2F51DEA8E4344), + UINT64_C(0xDA3E1A3D4CEBC96E), UINT64_C(0x1FE6FB42E97809DC) } }, + { { UINT64_C(0xA04482D2467D66E4), UINT64_C(0xCF1912934D78291D), + UINT64_C(0x8E0D4168482396F9), UINT64_C(0x7228E2D5D18F14D0), + UINT64_C(0x2F7E8D509C6A58FE), UINT64_C(0xE8CA780E373E5AEC) }, + { UINT64_C(0x42AAD1D61B68E9F8), UINT64_C(0x58A6D7F569E2F8F4), + UINT64_C(0xD779ADFE31DA1BEA), UINT64_C(0x7D26540638C85A85), + UINT64_C(0x67E67195D44D3CDF), UINT64_C(0x17820A0BC5134ED7) } }, + { { UINT64_C(0x019D6AC5D3021470), UINT64_C(0x25846B66780443D6), + UINT64_C(0xCE3C15ED55C97647), UINT64_C(0x3DC22D490E3FEB0F), + UINT64_C(0x2065B7CBA7DF26E4), UINT64_C(0xC8B00AE8187CEA1F) }, + { UINT64_C(0x1A5284A0865DDED3), UINT64_C(0x293C164920C83DE2), + UINT64_C(0xAB178D26CCE851B3), UINT64_C(0x8E6DB10B404505FB), + UINT64_C(0xF6F57E7190C82033), UINT64_C(0x1D2A1C015977F16C) } }, + { { UINT64_C(0xA39C89317C8906A4), UINT64_C(0xB6E7ECDD9E821EE6), + UINT64_C(0x2ECF8340F0DF4FE6), UINT64_C(0xD42F7DC953C14965), + UINT64_C(0x1AFB51A3E3BA8285), UINT64_C(0x6C07C4040A3305D1) }, + { UINT64_C(0xDAB83288127FC1DA), UINT64_C(0xBC0A699B374C4B08), + UINT64_C(0x402A9BAB42EB20DD), UINT64_C(0xD7DD464F045A7A1C), + UINT64_C(0x5B3D0D6D36BEECC4), UINT64_C(0x475A3E756398A19D) } }, + }, + { + { { UINT64_C(0x31BDB48372876AE8), UINT64_C(0xE3325D98961ED1BF), + UINT64_C(0x18C042469B6FC64D), UINT64_C(0x0DCC15FA15786B8C), + UINT64_C(0x81ACDB068E63DA4A), UINT64_C(0xD3A4B643DADA70FB) }, + { UINT64_C(0x46361AFEDEA424EB), UINT64_C(0xDC2D2CAE89B92970), + UINT64_C(0xF389B61B615694E6), UINT64_C(0x7036DEF1872951D2), + UINT64_C(0x40FD3BDAD93BADC7), UINT64_C(0x45AB6321380A68D3) } }, + { { UINT64_C(0x23C1F74481A2703A), UINT64_C(0x1A5D075CB9859136), + UINT64_C(0xA4F82C9D5AFD1BFD), UINT64_C(0xA3D1E9A4F89D76FE), + UINT64_C(0x964F705075702F80), UINT64_C(0x182BF349F56C089D) }, + { UINT64_C(0xE205FA8FBE0DA6E1), UINT64_C(0x32905EB90A40F8F3), + UINT64_C(0x331A1004356D4395), UINT64_C(0x58B78901FDBBDFDE), + UINT64_C(0xA52A15979BA00E71), UINT64_C(0xE0092E1F55497A30) } }, + { { UINT64_C(0x5562A85670EE8F39), UINT64_C(0x86B0C11764E52A9C), + UINT64_C(0xC19F317409C75B8C), UINT64_C(0x21C7CC3124923F80), + UINT64_C(0xE63FE47F8F5B291E), UINT64_C(0x3D6D3C050DC08B05) }, + { UINT64_C(0x58AE455EEE0C39A1), UINT64_C(0x78BEA4310AD97942), + UINT64_C(0x42C7C97F3EE3989C), UINT64_C(0xC1B03AF5F38759AE), + UINT64_C(0x1A673C75BCF46899), UINT64_C(0x4831B7D38D508C7D) } }, + { { UINT64_C(0x76512D1BC552E354), UINT64_C(0x2B7EB6DF273020FD), + UINT64_C(0xD1C73AA8025A5F25), UINT64_C(0x2ABA19295CBD2A40), + UINT64_C(0xB53CADC3C88D61C6), UINT64_C(0x7E66A95E098290F3) }, + { UINT64_C(0x72800ECBAF4C5073), UINT64_C(0x81F2725E9DC63FAF), + UINT64_C(0x14BF92A7282BA9D1), UINT64_C(0x90629672BD5F1BB2), + UINT64_C(0x362F68EBA97C6C96), UINT64_C(0xB1D3BB8B7EA9D601) } }, + { { UINT64_C(0x73878F7FA9C94429), UINT64_C(0xB35C3BC8456CA6D8), + UINT64_C(0xD96F0B3CF721923A), UINT64_C(0x28D8F06CE6D44FA1), + UINT64_C(0x94EFDCDCD5CD671A), UINT64_C(0x0299AB933F97D481) }, + { UINT64_C(0xB7CED6EA2FD1D324), UINT64_C(0xBD6832087E932EC2), + UINT64_C(0x24ED31FBCB755A6E), UINT64_C(0xA636098EE48781D2), + UINT64_C(0x8687C63CF0A4F297), UINT64_C(0xBB52344007478526) } }, + { { UINT64_C(0x2E5F741934124B56), UINT64_C(0x1F223AE14B3F02CA), + UINT64_C(0x6345B427E8336C7E), UINT64_C(0x92123E16F5D0E3D0), + UINT64_C(0xDAF0D14D45E79F3A), UINT64_C(0x6ACA67656F3BD0C6) }, + { UINT64_C(0xF6169FAB403813F4), UINT64_C(0x31DC39C0334A4C59), + UINT64_C(0x74C46753D589866D), UINT64_C(0x5741511D984C6A5D), + UINT64_C(0xF263128797FED2D3), UINT64_C(0x5687CA1B11614886) } }, + { { UINT64_C(0x076D902A33836D4B), UINT64_C(0xEC6C5C4324AFB557), + UINT64_C(0xA0FE2D1CA0516A0F), UINT64_C(0x6FB8D73700D22ECC), + UINT64_C(0xF1DE9077DAF1D7B3), UINT64_C(0xE4695F77D4C0C1EB) }, + { UINT64_C(0x5F0FD8A8B4375573), UINT64_C(0x762383595E50944F), + UINT64_C(0x65EA2F28635CD76F), UINT64_C(0x0854776925FDE7B0), + UINT64_C(0xB2345A2E51944304), UINT64_C(0x86EFA2F7A16C980D) } }, + { { UINT64_C(0x4CCBE2D0BF4D1D63), UINT64_C(0x32E33401397366D5), + UINT64_C(0xC83AFDDE71BDA2CE), UINT64_C(0x8DACE2AC478ED9E6), + UINT64_C(0x3AC6A559763FDD9E), UINT64_C(0x0FFDB04CB398558F) }, + { UINT64_C(0x6C1B99B2AFB9D6B8), UINT64_C(0x572BA39C27F815DD), + UINT64_C(0x9DE73EE70DBCF842), UINT64_C(0x2A3ED58929267B88), + UINT64_C(0xD46A7FD315EBBBB3), UINT64_C(0xD1D01863E29400C7) } }, + { { UINT64_C(0x8FB101D1E1F89EC5), UINT64_C(0xB87A1F53F8508042), + UINT64_C(0x28C8DB240ED7BEEF), UINT64_C(0x3940F845ACE8660A), + UINT64_C(0x4EACB619C6D453FD), UINT64_C(0x2E044C982BAD6160) }, + { UINT64_C(0x8792854880B16C02), UINT64_C(0xF0D4BEB3C0A9EB64), + UINT64_C(0xD785B4AFC183C195), UINT64_C(0x23AAB0E65E6C46EA), + UINT64_C(0x30F7E104A930FECA), UINT64_C(0x6A1A7B8BD55C10FB) } }, + { { UINT64_C(0xDA74EAEBDBFED1AA), UINT64_C(0xC8A59223DF0B025C), + UINT64_C(0x7EF7DC85D5B627F7), UINT64_C(0x02A13AE1197D7624), + UINT64_C(0x119E9BE12F785A9B), UINT64_C(0xC0B7572F00D6B219) }, + { UINT64_C(0x9B1E51266D4CAF30), UINT64_C(0xA16A51170A840BD1), + UINT64_C(0x5BE17B910E9CCF43), UINT64_C(0x5BDBEDDD69CF2C9C), + UINT64_C(0x9FFBFBCF4CF4F289), UINT64_C(0xE1A621836C355CE9) } }, + { { UINT64_C(0x056199D9A7B2FCCF), UINT64_C(0x51F2E7B6CE1D784E), + UINT64_C(0xA1D09C47339E2FF0), UINT64_C(0xC8E64890B836D0A9), + UINT64_C(0x2F781DCBC0D07EBE), UINT64_C(0x5CF3C2AD3ACF934C) }, + { UINT64_C(0xE55DB190A17E26AE), UINT64_C(0xC9C61E1F91245513), + UINT64_C(0x83D7E6CF61998C15), UINT64_C(0x4DB33C85E41D38E3), + UINT64_C(0x74D5F91DC2FEE43D), UINT64_C(0x7EBBDB4536BBC826) } }, + { { UINT64_C(0xE20EC7E9CB655A9D), UINT64_C(0x4977EB925C47D421), + UINT64_C(0xA237E12C3B9D72FA), UINT64_C(0xCAAEDBC1CBF7B145), + UINT64_C(0x5200F5B23B77AAA3), UINT64_C(0x32EDED55BDBE5380) }, + { UINT64_C(0x74E38A40E7C9B80A), UINT64_C(0x3A3F0CF8AB6DE911), + UINT64_C(0x56DCDD7AAD16AAF0), UINT64_C(0x3D2924498E861D5E), + UINT64_C(0xD6C61878985733E2), UINT64_C(0x2401FE7D6AA6CD5B) } }, + { { UINT64_C(0xABB3DC75B42E3686), UINT64_C(0xAE712419B4C57E61), + UINT64_C(0x2C565F72B21B009B), UINT64_C(0xA5F1DA2E710C3699), + UINT64_C(0x771099A0A5EBA59A), UINT64_C(0x4DA88F4AC10017A0) }, + { UINT64_C(0x987FFFD31927B56D), UINT64_C(0xB98CB8ECC4E33478), + UINT64_C(0xB224A971C2248166), UINT64_C(0x5470F554DE1DC794), + UINT64_C(0xD747CC24E31FF983), UINT64_C(0xB91745E9B5B22DAE) } }, + { { UINT64_C(0x6CCBFED072F34420), UINT64_C(0x95045E4DA53039D2), + UINT64_C(0x3B6C11545A793944), UINT64_C(0xAA114145DDB6B799), + UINT64_C(0xABC15CA4252B7637), UINT64_C(0x5745A35BA5744634) }, + { UINT64_C(0x05DC6BDEDA596FC0), UINT64_C(0xCD52C18CA8020881), + UINT64_C(0x03FA9F47D296BAD0), UINT64_C(0xD8E2C1297268E139), + UINT64_C(0x58C1A98D9EC450B0), UINT64_C(0x909638DADE48B20D) } }, + { { UINT64_C(0x7AFC30D49B7F8311), UINT64_C(0x82A0042242368EA3), + UINT64_C(0xBFF951986F5F9865), UINT64_C(0x9B24F612FC0A070F), + UINT64_C(0x22C06CF2620F489D), UINT64_C(0x3C7ED052780F7DBB) }, + { UINT64_C(0xDB87AB1834DAFE9B), UINT64_C(0x20C03B409C4BBCA1), + UINT64_C(0x5D718CF059A42341), UINT64_C(0x9863170669E84538), + UINT64_C(0x5557192BD27D64E1), UINT64_C(0x08B4EC52DA822766) } }, + { { UINT64_C(0xB2D986F6D66C1A59), UINT64_C(0x927DEB1678E0E423), + UINT64_C(0x9E673CDE49C3DEDC), UINT64_C(0xFA362D84F7ECB6CF), + UINT64_C(0x078E5F401BA17340), UINT64_C(0x934CA5D11F4E489C) }, + { UINT64_C(0xC03C073164EEF493), UINT64_C(0x631A353BD7931A7E), + UINT64_C(0x8E7CC3BB65DD74F1), UINT64_C(0xD55864C5702676A5), + UINT64_C(0x6D306AC4439F04BD), UINT64_C(0x58544F672BAFED57) } }, + }, + { + { { UINT64_C(0xB083BA6AEC074AEA), UINT64_C(0x46FAC5EF7F0B505B), + UINT64_C(0x95367A21FC82DC03), UINT64_C(0x227BE26A9D3679D8), + UINT64_C(0xC70F6D6C7E9724C0), UINT64_C(0xCD68C757F9EBEC0F) }, + { UINT64_C(0x29DDE03E8FF321B2), UINT64_C(0xF84AD7BB031939DC), + UINT64_C(0xDAF590C90F602F4B), UINT64_C(0x17C5288849722BC4), + UINT64_C(0xA8DF99F0089B22B6), UINT64_C(0xC21BC5D4E59B9B90) } }, + { { UINT64_C(0x4936C6A08A31973F), UINT64_C(0x54D442FA83B8C205), + UINT64_C(0x03AEE8B45714F2C6), UINT64_C(0x139BD6923F5AC25A), + UINT64_C(0x6A2E42BAB5B33794), UINT64_C(0x50FA11643FF7BBA9) }, + { UINT64_C(0xB61D8643F7E2C099), UINT64_C(0x2366C993BD5C6637), + UINT64_C(0x62110E1472EB77FA), UINT64_C(0x3D5B96F13B99C635), + UINT64_C(0x956ECF64F674C9F2), UINT64_C(0xC56F7E51EF2BA250) } }, + { { UINT64_C(0x246FFCB6FF602C1B), UINT64_C(0x1E1A1D746E1258E0), + UINT64_C(0xB4B43AE2250E6676), UINT64_C(0x95C1B5F0924CE5FA), + UINT64_C(0x2555795BEBD8C776), UINT64_C(0x4C1E03DCACD9D9D0) }, + { UINT64_C(0xE1D74AA69CE90C61), UINT64_C(0xA88C0769A9C4B9F9), + UINT64_C(0xDF74DF2795AF56DE), UINT64_C(0x24B10C5FB331B6F4), + UINT64_C(0xB0A6DF9A6559E137), UINT64_C(0x6ACC1B8FC06637F2) } }, + { { UINT64_C(0xBD8C086834B4E381), UINT64_C(0x278CACC730DFF271), + UINT64_C(0x87ED12DE02459389), UINT64_C(0x3F7D98FFDEF840B6), + UINT64_C(0x71EEE0CB5F0B56E1), UINT64_C(0x462B5C9BD8D9BE87) }, + { UINT64_C(0xE6B50B5A98094C0F), UINT64_C(0x26F3B274508C67CE), + UINT64_C(0x418B1BD17CB1F992), UINT64_C(0x607818ED4FF11827), + UINT64_C(0xE630D93A9B042C63), UINT64_C(0x38B9EFF38C779AE3) } }, + { { UINT64_C(0xE8767D36729C5431), UINT64_C(0xA8BD07C0BB94642C), + UINT64_C(0x0C11FC8E58F2E5B2), UINT64_C(0xD8912D48547533FE), + UINT64_C(0xAAE14F5E230D91FB), UINT64_C(0xC122051A676DFBA0) }, + { UINT64_C(0x9ED4501F5EA93078), UINT64_C(0x2758515CBD4BEE0A), + UINT64_C(0x97733C6C94D21F52), UINT64_C(0x139BCD6D4AD306A2), + UINT64_C(0x0AAECBDC298123CC), UINT64_C(0x102B8A311CB7C7C9) } }, + { { UINT64_C(0x22A28E59FAF46675), UINT64_C(0x1075730810A31E7D), + UINT64_C(0xC7EEAC842B4C2F4F), UINT64_C(0xBA370148B5EF5184), + UINT64_C(0x4A5A28668732E055), UINT64_C(0x14B8DCDCB887C36F) }, + { UINT64_C(0xDBA8C85C433F093D), UINT64_C(0x73DF549D1C9A201C), + UINT64_C(0x69AA0D7B70F927D8), UINT64_C(0xFA3A8685D7D2493A), + UINT64_C(0x6F48A2550A7F4013), UINT64_C(0xD20C8BF9DD393067) } }, + { { UINT64_C(0x4EC874EA81625E78), UINT64_C(0x8B8D8B5A3FBE9267), + UINT64_C(0xA3D9D1649421EC2F), UINT64_C(0x490E92D9880EA295), + UINT64_C(0x745D1EDCD8F3B6DA), UINT64_C(0x0116628B8F18BA03) }, + { UINT64_C(0x0FF6BCE0834EADCE), UINT64_C(0x464697F2000827F7), + UINT64_C(0x08DCCF84498D724E), UINT64_C(0x7896D3651E88304C), + UINT64_C(0xE63EBCCE135E3622), UINT64_C(0xFB942E8EDC007521) } }, + { { UINT64_C(0xBB155A66A3688621), UINT64_C(0xED2FD7CDF91B52A3), + UINT64_C(0x52798F5DEA20CB88), UINT64_C(0x069CE105373F7DD8), + UINT64_C(0xF9392EC78CA78F6B), UINT64_C(0xB3013E256B335169) }, + { UINT64_C(0x1D92F8006B11715C), UINT64_C(0xADD4050EFF9DC464), + UINT64_C(0x2AC226598465B84A), UINT64_C(0x2729D646465B2BD6), + UINT64_C(0x6202344AE4EFF9DD), UINT64_C(0x51F3198FCD9B90B9) } }, + { { UINT64_C(0x17CE54EFE5F0AE1D), UINT64_C(0x984E8204B09852AF), + UINT64_C(0x3365B37AC4B27A71), UINT64_C(0x720E3152A00E0A9C), + UINT64_C(0x3692F70D925BD606), UINT64_C(0xBE6E699D7BC7E9AB) }, + { UINT64_C(0xD75C041F4C89A3C0), UINT64_C(0x8B9F592D8DC100C0), + UINT64_C(0x30750F3AAD228F71), UINT64_C(0x1B9ECF84E8B17A11), + UINT64_C(0xDF2025620FBFA8A2), UINT64_C(0x45C811FCAA1B6D67) } }, + { { UINT64_C(0xEC5B84B71A5151F8), UINT64_C(0x118E59E8550AB2D2), + UINT64_C(0x2CCDEDA4049BD735), UINT64_C(0xC99CBA719CD62F0F), + UINT64_C(0x69B8040A62C9E4F8), UINT64_C(0x16F1A31A110B8283) }, + { UINT64_C(0x53F6380298E908A3), UINT64_C(0x308CB6EFD862F9DE), + UINT64_C(0xE185DAD8A521A95A), UINT64_C(0x4D8FE9A4097F75CA), + UINT64_C(0xD1ECCEC71CA07D53), UINT64_C(0x13DFA1DC0DB07E83) } }, + { { UINT64_C(0xDDAF9DC60F591A76), UINT64_C(0xE1A6D7CC1685F412), + UINT64_C(0x153DE557002B6E8D), UINT64_C(0x730C38BCC6DA37D9), + UINT64_C(0xAE1806220914B597), UINT64_C(0x84F98103DD8C3A0A) }, + { UINT64_C(0x369C53988DA205B0), UINT64_C(0xA3D95B813888A720), + UINT64_C(0x1F3F8BBFE10E2806), UINT64_C(0x48663DF54530D1F3), + UINT64_C(0x320523B43E377713), UINT64_C(0xE8B1A575C7894814) } }, + { { UINT64_C(0x330668712EE8EA07), UINT64_C(0xC6FB4EC560DA199D), + UINT64_C(0x33231860F4370A05), UINT64_C(0x7ABECE72C6DE4E26), + UINT64_C(0xDE8D4BD8EBDECE7A), UINT64_C(0xC90EE6571CBE93C7) }, + { UINT64_C(0x0246751B85AC2509), UINT64_C(0xD0EF142C30380245), + UINT64_C(0x086DF9C47C76E39C), UINT64_C(0x68F1304FB789FB56), + UINT64_C(0x23E4CB98A5E4BD56), UINT64_C(0x69A4C63C64663DCA) } }, + { { UINT64_C(0x6C72B6AF7CB34E63), UINT64_C(0x073C40CD6DFC23FE), + UINT64_C(0xBDEEE7A1C936693A), UINT64_C(0xBC858E806EFAD378), + UINT64_C(0xEAD719FFF5BE55D4), UINT64_C(0xC8C3238F04552F5F) }, + { UINT64_C(0x0952C068928D5784), UINT64_C(0x89DFDF2294C58F2B), + UINT64_C(0x332DEDF367502C50), UINT64_C(0x3ED2FA3AAC0BE258), + UINT64_C(0xAEDC9B8A7C5C8244), UINT64_C(0x43A761B9DC0EA34F) } }, + { { UINT64_C(0x8FD683A2CC5E21A5), UINT64_C(0x5F444C6EFBA2BB68), + UINT64_C(0x709ACD0EAF05586D), UINT64_C(0x8EFA54D2DE8FB348), + UINT64_C(0x35276B7134CFE29E), UINT64_C(0x77A06FCD941EAC8C) }, + { UINT64_C(0x5815792D928322DD), UINT64_C(0x82FF356B67F7CB59), + UINT64_C(0x71E40A78304980F4), UINT64_C(0xC8645C273667D021), + UINT64_C(0xE785741CAEBAE28F), UINT64_C(0xB2C1BC7553ECAC37) } }, + { { UINT64_C(0x633EB24F1D0A74DB), UINT64_C(0xF1F55E56FA752512), + UINT64_C(0x75FECA688EFE11DE), UINT64_C(0xC80FD91CE6BF19EC), + UINT64_C(0xAD0BAFEC2A14C908), UINT64_C(0x4E1C4ACAADE4031F) }, + { UINT64_C(0x463A815B1EB1549A), UINT64_C(0x5AD4253C668F1298), + UINT64_C(0x5CB3866238A37151), UINT64_C(0x34BB1CCFAFF16B96), + UINT64_C(0xDCA93B13EE731AB0), UINT64_C(0x9F3CE5CC9BE01A0B) } }, + { { UINT64_C(0x75DB5723A110D331), UINT64_C(0x67C66F6A7123D89F), + UINT64_C(0x27ABBD4B4009D570), UINT64_C(0xACDA6F84C73451BC), + UINT64_C(0xE4B9A23905575ACF), UINT64_C(0x3C2DB7EFAB2D3D6C) }, + { UINT64_C(0x01CCDD0829115145), UINT64_C(0x9E0602FE57B5814A), + UINT64_C(0x679B35C287862838), UINT64_C(0x0277DC4C38AD598D), + UINT64_C(0xEF80A2136D896DD4), UINT64_C(0xC8812213E7B9047B) } }, + }, + { + { { UINT64_C(0xAC6DBDF6EDC9CE62), UINT64_C(0xA58F5B440F9C006E), + UINT64_C(0x16694DE3DC28E1B0), UINT64_C(0x2D039CF2A6647711), + UINT64_C(0xA13BBE6FC5B08B4B), UINT64_C(0xE44DA93010EBD8CE) }, + { UINT64_C(0xCD47208719649A16), UINT64_C(0xE18F4E44683E5DF1), + UINT64_C(0xB3F66303929BFA28), UINT64_C(0x7C378E43818249BF), + UINT64_C(0x76068C80847F7CD9), UINT64_C(0xEE3DB6D1987EBA16) } }, + { { UINT64_C(0xCBBD8576C42A2F52), UINT64_C(0x9ACC6F709D2B06BB), + UINT64_C(0xE5CB56202E6B72A4), UINT64_C(0x5738EA0E7C024443), + UINT64_C(0x8ED06170B55368F3), UINT64_C(0xE54C99BB1AEED44F) }, + { UINT64_C(0x3D90A6B2E2E0D8B2), UINT64_C(0x21718977CF7B2856), + UINT64_C(0x089093DCC5612AEC), UINT64_C(0xC272EF6F99C1BACC), + UINT64_C(0x47DB3B43DC43EAAD), UINT64_C(0x730F30E40832D891) } }, + { { UINT64_C(0x9FFE55630C7FECDB), UINT64_C(0x55CC67B6F88101E5), + UINT64_C(0x3039F981CBEFA3C7), UINT64_C(0x2AB06883667BFD64), + UINT64_C(0x9007A2574340E3DF), UINT64_C(0x1AC3F3FA5A3A49CA) }, + { UINT64_C(0x9C7BE629C97E20FD), UINT64_C(0xF61823D3A3DAE003), + UINT64_C(0xFFE7FF39E7380DBA), UINT64_C(0x620BB9B59FACC3B8), + UINT64_C(0x2DDCB8CD31AE422C), UINT64_C(0x1DE3BCFAD12C3C43) } }, + { { UINT64_C(0x8C074946D6E0F9A9), UINT64_C(0x662FA99551C3B05B), + UINT64_C(0x6CDAE96904BB2048), UINT64_C(0x6DEC9594D6DC8B60), + UINT64_C(0x8D26586954438BBC), UINT64_C(0x88E983E31B0E95A5) }, + { UINT64_C(0x8189F11460CBF838), UINT64_C(0x77190697771DC46B), + UINT64_C(0x775775A227F8EC1A), UINT64_C(0x7A125240607E3739), + UINT64_C(0xAFAE84E74F793E4E), UINT64_C(0x44FA17F35BF5BAF4) } }, + { { UINT64_C(0xA21E69A5D03AC439), UINT64_C(0x2069C5FC88AA8094), + UINT64_C(0xB041EEA78C08F206), UINT64_C(0x55B9D4613D65B8ED), + UINT64_C(0x951EA25CD392C7C4), UINT64_C(0x4B9A1CEC9D166232) }, + { UINT64_C(0xC184FCD8FCF931A4), UINT64_C(0xBA59AD44063AD374), + UINT64_C(0x1868AD2A1AA9796F), UINT64_C(0x38A34018DFF29832), + UINT64_C(0x01FC880103DF8070), UINT64_C(0x1282CCE048DD334A) } }, + { { UINT64_C(0x76AA955726D8503C), UINT64_C(0xBE962B636BC3E3D0), + UINT64_C(0xF5CA93E597DE8841), UINT64_C(0x1561B05EAF3F2C16), + UINT64_C(0x34BE00AAD34BFF98), UINT64_C(0xEA21E6E9D23D2925) }, + { UINT64_C(0x55713230394C3AFB), UINT64_C(0xEAF0529BD6C8BECA), + UINT64_C(0xFF38A743202B9A11), UINT64_C(0xA13E39FC6D3A398B), + UINT64_C(0x8CBD644B86E2615A), UINT64_C(0x92063988191057EC) } }, + { { UINT64_C(0x787835CE13F89146), UINT64_C(0x7FCD42CC69446C3F), + UINT64_C(0x0DA2AA98840E679D), UINT64_C(0x44F2052318779A1B), + UINT64_C(0xE3A3B34FEFBF5935), UINT64_C(0xA5D2CFD0B9947B70) }, + { UINT64_C(0xAE2AF4EF27F4E16F), UINT64_C(0xA7FA70D2B9D21322), + UINT64_C(0x68084919B3FD566B), UINT64_C(0xF04D71C8D7AAD6AB), + UINT64_C(0xDBEA21E410BC4260), UINT64_C(0xAA7DC6658D949B42) } }, + { { UINT64_C(0xD8E958A06CCB8213), UINT64_C(0x118D9DB991900B54), + UINT64_C(0x09BB9D4985E8CED6), UINT64_C(0x410E9FB524019281), + UINT64_C(0x3B31B4E16D74C86E), UINT64_C(0x52BC0252020BB77D) }, + { UINT64_C(0x5616A26F27092CE4), UINT64_C(0x67774DBCA08F65CD), + UINT64_C(0x560AD494C08BD569), UINT64_C(0xBE26DA36AD498783), + UINT64_C(0x0276C8AB7F019C91), UINT64_C(0x09843ADA5248266E) } }, + { { UINT64_C(0xA0AE88A77D963CF2), UINT64_C(0x91EF8986D0E84920), + UINT64_C(0xC7EFE344F8C58104), UINT64_C(0x0A25D9FDECA20773), + UINT64_C(0x9D989FAA00D8F1D5), UINT64_C(0x4204C8CEC8B06264) }, + { UINT64_C(0x717C12E0BE1A2796), UINT64_C(0x1FA4BA8CC190C728), + UINT64_C(0xA245CA8D8C8A59BA), UINT64_C(0xE3C374757672B935), + UINT64_C(0x083D5E402E4D6375), UINT64_C(0x0B8D5AB35455E16E) } }, + { { UINT64_C(0x1DB17DBFEED765D4), UINT64_C(0xBBC9B1BEA5DDB965), + UINT64_C(0x1948F76DDFC12ABC), UINT64_C(0x2C2714E5134EF489), + UINT64_C(0x60CE2EE8741C600F), UINT64_C(0x32396F22F80E6E63) }, + { UINT64_C(0x421DAC7522537F59), UINT64_C(0x58FB73C649475DF5), + UINT64_C(0x0ABF28856F18F1C7), UINT64_C(0x364744689A398D16), + UINT64_C(0x87A661A7BF673B87), UINT64_C(0x3E80698F73819E17) } }, + { { UINT64_C(0xDFE4979353784CC4), UINT64_C(0x4280EAB0486D508F), + UINT64_C(0x119593FFE534F5A4), UINT64_C(0x98AEFADD9F63242F), + UINT64_C(0x9AE6A24AC4829CAE), UINT64_C(0xF2373CA558E8BA80) }, + { UINT64_C(0x4017AF7E51765FB3), UINT64_C(0xD1E40F7CAF4AEC4B), + UINT64_C(0x87372C7A0898E3BC), UINT64_C(0x688982B285452CA9), + UINT64_C(0x71E0B4BFB1E50BCA), UINT64_C(0x21FD2DBFF70E714A) } }, + { { UINT64_C(0xEE6E8820FB78DDAC), UINT64_C(0x0BAED29C063892CD), + UINT64_C(0x5F33049C28C0588D), UINT64_C(0x90C2515E18DBC432), + UINT64_C(0xB8A1B1433B4CB0BD), UINT64_C(0x0AB5C0C968103043) }, + { UINT64_C(0xF3788FA04005EC40), UINT64_C(0x82571C99039EE115), + UINT64_C(0xEE8FCED593260BED), UINT64_C(0x5A9BAF7910836D18), + UINT64_C(0x7C258B09C46AA4F6), UINT64_C(0x46ECC5E837F53D31) } }, + { { UINT64_C(0xFA32C0DCBFE0DD98), UINT64_C(0x66EFAFC4962B1066), + UINT64_C(0xBA81D33E64BDF5EB), UINT64_C(0x36C28536FC7FC512), + UINT64_C(0x0C95176BE0B4FA97), UINT64_C(0x47DDE29B3B9BC64A) }, + { UINT64_C(0x08D986FD5C173B36), UINT64_C(0x46D84B526CF3F28C), + UINT64_C(0x6F6ED6C3F026BDB9), UINT64_C(0xAC90668B68206DC5), + UINT64_C(0xE8ED5D98ECBE4E70), UINT64_C(0xCFFF61DDDC1A6974) } }, + { { UINT64_C(0xFF5C3A2977B1A5C1), UINT64_C(0x10C27E4A0DDF995D), + UINT64_C(0xCB745F77E23363E3), UINT64_C(0xD765DF6F32F399A3), + UINT64_C(0xF0CA0C2F8A99E109), UINT64_C(0xC3A6BFB71E025CA0) }, + { UINT64_C(0x830B2C0A4F9D9FA5), UINT64_C(0xAE914CACBD1A84E5), + UINT64_C(0x30B35ED8A4FEBCC1), UINT64_C(0xCB902B4684CFBF2E), + UINT64_C(0x0BD4762825FC6375), UINT64_C(0xA858A53C85509D04) } }, + { { UINT64_C(0x8B995D0C552E0A3F), UINT64_C(0xEDBD4E9417BE9FF7), + UINT64_C(0x3432E83995085178), UINT64_C(0x0FE5C18180C256F5), + UINT64_C(0x05A64EA8EBF9597C), UINT64_C(0x6ED44BB13F80371F) }, + { UINT64_C(0x6A29A05EFE4C12EE), UINT64_C(0x3E436A43E0BB83B3), + UINT64_C(0x38365D9A74D72921), UINT64_C(0x3F5EE823C38E1ED7), + UINT64_C(0x09A53213E8FA063F), UINT64_C(0x1E7FE47AB435E713) } }, + { { UINT64_C(0xE4D9BC94FDDD17F3), UINT64_C(0xC74B8FEDC1016C20), + UINT64_C(0x095DE39BB49C060E), UINT64_C(0xDBCC67958AC0DF00), + UINT64_C(0x4CF6BAEB1C34F4DF), UINT64_C(0x72C55C21E8390170) }, + { UINT64_C(0x4F17BFD2F6C48E79), UINT64_C(0x18BF4DA0017A80BA), + UINT64_C(0xCF51D829BCF4B138), UINT64_C(0x598AEE5FF48F8B0D), + UINT64_C(0x83FAEE5620F10809), UINT64_C(0x4615D4DC779F0850) } }, + }, + { + { { UINT64_C(0x22313DEE5852B59B), UINT64_C(0x6F56C8E8B6A0B37F), + UINT64_C(0x43D6EEAEA76EC380), UINT64_C(0xA16551360275AD36), + UINT64_C(0xE5C1B65ADF095BDA), UINT64_C(0xBD1FFA8D367C44B0) }, + { UINT64_C(0xE2B419C26B48AF2B), UINT64_C(0x57BBBD973DA194C8), + UINT64_C(0xB5FBE51FA2BAFF05), UINT64_C(0xA0594D706269B5D0), + UINT64_C(0x0B07B70523E8D667), UINT64_C(0xAE1976B563E016E7) } }, + { { UINT64_C(0x2FDE4893FBECAAAE), UINT64_C(0x444346DE30332229), + UINT64_C(0x157B8A5B09456ED5), UINT64_C(0x73606A7925797C6C), + UINT64_C(0xA9D0F47C33C14C06), UINT64_C(0x7BC8962CFAF971CA) }, + { UINT64_C(0x6E763C5165909DFD), UINT64_C(0x1BBBE41B14A9BF42), + UINT64_C(0xD95B7ECBC49E9EFC), UINT64_C(0x0C317927B38F2B59), + UINT64_C(0x97912B53B3C397DB), UINT64_C(0xCB3879AA45C7ABC7) } }, + { { UINT64_C(0xCD81BDCF24359B81), UINT64_C(0x6FD326E2DB4C321C), + UINT64_C(0x4CB0228BF8EBE39C), UINT64_C(0x496A9DCEB2CDD852), + UINT64_C(0x0F115A1AD0E9B3AF), UINT64_C(0xAA08BF36D8EEEF8A) }, + { UINT64_C(0x5232A51506E5E739), UINT64_C(0x21FAE9D58407A551), + UINT64_C(0x289D18B08994B4E8), UINT64_C(0xB4E346A809097A52), + UINT64_C(0xC641510F324621D0), UINT64_C(0xC567FD4A95A41AB8) } }, + { { UINT64_C(0x261578C7D57C8DE9), UINT64_C(0xB9BC491F3836C5C8), + UINT64_C(0x993266B414C8038F), UINT64_C(0xBACAD755FAA7CC39), + UINT64_C(0x418C4DEFD69B7E27), UINT64_C(0x53FDC5CDAE751533) }, + { UINT64_C(0x6F3BD329C3EEA63A), UINT64_C(0xA7A22091E53DD29E), + UINT64_C(0xB7164F73DC4C54EC), UINT64_C(0xCA66290D44D3D74E), + UINT64_C(0xF77C62424C9EA511), UINT64_C(0x34337F551F714C49) } }, + { { UINT64_C(0x5ED2B216A64B6C4B), UINT64_C(0x1C38794F3AAE640D), + UINT64_C(0x30BBAEE08905794F), UINT64_C(0x0D9EE41EC8699CFB), + UINT64_C(0xAF38DAF2CF7B7C29), UINT64_C(0x0D6A05CA43E53513) }, + { UINT64_C(0xBE96C6442606AB56), UINT64_C(0x13E7A072E9EB9734), + UINT64_C(0xF96694455FF50CD7), UINT64_C(0x68EF26B547DA6F1D), + UINT64_C(0xF002873823687CB7), UINT64_C(0x5ED9C8766217C1CE) } }, + { { UINT64_C(0x423BA5130A3A9691), UINT64_C(0xF421B1E7B3179296), + UINT64_C(0x6B51BCDB1A871E1B), UINT64_C(0x6E3BB5B5464E4300), + UINT64_C(0x24171E2EFC6C54CC), UINT64_C(0xA9DFA947D3E58DC2) }, + { UINT64_C(0x175B33099DE9CFA7), UINT64_C(0x707B25292D1015DA), + UINT64_C(0xCBB95F17993EA65A), UINT64_C(0x935150630447450D), + UINT64_C(0x0F47B2051B2753C9), UINT64_C(0x4A0BAB14E7D427CF) } }, + { { UINT64_C(0xA39DEF39B5AA7CA1), UINT64_C(0x591CB173C47C33DF), + UINT64_C(0xA09DAC796BBAB872), UINT64_C(0x3EF9D7CF7208BA2F), + UINT64_C(0x3CC189317A0A34FC), UINT64_C(0xAE31C62BBCC3380F) }, + { UINT64_C(0xD72A67940287C0B4), UINT64_C(0x3373382C68E334F1), + UINT64_C(0xD0310CA8BD20C6A6), UINT64_C(0xA2734B8742C033FD), + UINT64_C(0xA5D390F18DCE4509), UINT64_C(0xFC84E74B3E1AFCB5) } }, + { { UINT64_C(0xB028334DF2CD8A9C), UINT64_C(0xB8719291570F76F6), + UINT64_C(0x662A386E01065A2D), UINT64_C(0xDF1634CB53D940AE), + UINT64_C(0x625A7B838F5B41F9), UINT64_C(0xA033E4FEEE6AA1B4) }, + { UINT64_C(0x51E9D4631E42BABB), UINT64_C(0x660BC2E40D388468), + UINT64_C(0x3F702189FCBB114A), UINT64_C(0x6B46FE35B414CA78), + UINT64_C(0x328F6CF24A57316B), UINT64_C(0x917423B5381AD156) } }, + { { UINT64_C(0xAC19306E5373A607), UINT64_C(0x471DF8E3191D0969), + UINT64_C(0x380ADE35B9720D83), UINT64_C(0x7423FDF548F1FD5C), + UINT64_C(0x8B090C9F49CABC95), UINT64_C(0xB768E8CDC9842F2F) }, + { UINT64_C(0x399F456DE56162D6), UINT64_C(0xBB6BA2404F326791), + UINT64_C(0x8F4FBA3B342590BE), UINT64_C(0x053986B93DFB6B3E), + UINT64_C(0xBB6739F1190C7425), UINT64_C(0x32D4A55332F7E95F) } }, + { { UINT64_C(0x0205A0EC0DDBFB21), UINT64_C(0x3010327D33AC3407), + UINT64_C(0xCF2F4DB33348999B), UINT64_C(0x660DB9F41551604A), + UINT64_C(0xC346C69A5D38D335), UINT64_C(0x64AAB3D338882479) }, + { UINT64_C(0xA096B5E76AE44403), UINT64_C(0x6B4C9571645F76CD), + UINT64_C(0x72E1CD5F4711120F), UINT64_C(0x93EC42ACF27CC3E1), + UINT64_C(0x2D18D004A72ABB12), UINT64_C(0x232E9568C9841A04) } }, + { { UINT64_C(0xFF01DB223CC7F908), UINT64_C(0x9F214F8FD13CDD3B), + UINT64_C(0x38DADBB7E0B014B5), UINT64_C(0x2C548CCC94245C95), + UINT64_C(0x714BE331809AFCE3), UINT64_C(0xBCC644109BFE957E) }, + { UINT64_C(0xC21C2D215B957F80), UINT64_C(0xBA2D4FDCBB8A4C42), + UINT64_C(0xFA6CD4AF74817CEC), UINT64_C(0x9E7FB523C528EAD6), + UINT64_C(0xAED781FF7714B10E), UINT64_C(0xB52BB59294F04455) } }, + { { UINT64_C(0xA578BD69868CC68B), UINT64_C(0xA40FDC8D603F2C08), + UINT64_C(0x53D79BD12D81B042), UINT64_C(0x1B136AF3A7587EAB), + UINT64_C(0x1ED4F939868A16DB), UINT64_C(0x775A61FBD0B98273) }, + { UINT64_C(0xBA5C12A6E56BEF8C), UINT64_C(0xF926CE52DDDC8595), + UINT64_C(0xA13F5C8F586FE1F8), UINT64_C(0xEAC9F7F2060DBB54), + UINT64_C(0x70C0AC3A51AF4342), UINT64_C(0xC16E303C79CDA450) } }, + { { UINT64_C(0xD0DADD6C8113F4EA), UINT64_C(0xF14E392207BDF09F), + UINT64_C(0x3FE5E9C2AA7D877C), UINT64_C(0x9EA95C1948779264), + UINT64_C(0xE93F65A74FCB8344), UINT64_C(0x9F40837E76D925A4) }, + { UINT64_C(0x0EA6DA3F8271FFC7), UINT64_C(0x557FA529CC8F9B19), + UINT64_C(0x2613DBF178E6DDFD), UINT64_C(0x7A7523B836B1E954), + UINT64_C(0x20EB3168406A87FB), UINT64_C(0x64C21C1403ABA56A) } }, + { { UINT64_C(0xE86C9C2DC032DD5F), UINT64_C(0x158CEB8E86F16A21), + UINT64_C(0x0279FF5368326AF1), UINT64_C(0x1FFE2E2B59F12BA5), + UINT64_C(0xD75A46DB86826D45), UINT64_C(0xE19B48411E33E6AC) }, + { UINT64_C(0x5F0CC5240E52991C), UINT64_C(0x645871F98B116286), + UINT64_C(0xAB3B4B1EFCAEC5D3), UINT64_C(0x994C8DF051D0F698), + UINT64_C(0x06F890AFE5D13040), UINT64_C(0x72D9DC235F96C7C2) } }, + { { UINT64_C(0x7C018DEEE7886A80), UINT64_C(0xFA2093308786E4A3), + UINT64_C(0xCEC8E2A3A4415CA1), UINT64_C(0x5C736FC1CC83CC60), + UINT64_C(0xFEF9788CF00C259F), UINT64_C(0xED5C01CBDD29A6AD) }, + { UINT64_C(0x87834A033E20825B), UINT64_C(0x13B1239D123F9358), + UINT64_C(0x7E8869D0FBC286C1), UINT64_C(0xC4AB5AA324CE8609), + UINT64_C(0x38716BEEB6349208), UINT64_C(0x0BDF4F99B322AE21) } }, + { { UINT64_C(0x6B97A2BF53E3494B), UINT64_C(0xA8AA05C570F7A13E), + UINT64_C(0x209709C2F1305B51), UINT64_C(0x57B31888DAB76F2C), + UINT64_C(0x75B2ECD7AA2A406A), UINT64_C(0x88801A00A35374A4) }, + { UINT64_C(0xE1458D1C45C0471B), UINT64_C(0x5760E306322C1AB0), + UINT64_C(0x789A0AF1AD6AB0A6), UINT64_C(0x74398DE1F458B9CE), + UINT64_C(0x1652FF9F32E0C65F), UINT64_C(0xFAF1F9D5FFFB3A52) } }, + }, + { + { { UINT64_C(0xA05C751CD1D1B007), UINT64_C(0x016C213B0213E478), + UINT64_C(0x9C56E26CF4C98FEE), UINT64_C(0x6084F8B9E7B3A7C7), + UINT64_C(0xA0B042F6DECC1646), UINT64_C(0x4A6F3C1AFBF3A0BC) }, + { UINT64_C(0x94524C2C51C9F909), UINT64_C(0xF3B3AD403A6D3748), + UINT64_C(0x18792D6E7CE1F9F5), UINT64_C(0x8EBC2FD7FC0C34FA), + UINT64_C(0x032A9F41780A1693), UINT64_C(0x34F9801E56A60019) } }, + { { UINT64_C(0xB398290CF0DB3751), UINT64_C(0x01170580BA42C976), + UINT64_C(0x3E71AA2956560B89), UINT64_C(0x80817AAC50E6647B), + UINT64_C(0x35C833ADA0BE42DA), UINT64_C(0xFA3C6148F1BABA4E) }, + { UINT64_C(0xC57BE645CD8F6253), UINT64_C(0x77CEE46BC657AD0D), + UINT64_C(0x830077310DEFD908), UINT64_C(0x92FE9BCE899CBA56), + UINT64_C(0x48450EC4BCEFFB5A), UINT64_C(0xE615148DF2F5F4BF) } }, + { { UINT64_C(0xF55EDABB90B86166), UINT64_C(0x27F7D784075430A2), + UINT64_C(0xF53E822B9BF17161), UINT64_C(0x4A5B3B93AFE808DC), + UINT64_C(0x590BBBDED7272F55), UINT64_C(0x233D63FAEAEA79A1) }, + { UINT64_C(0xD7042BEAFE1EBA07), UINT64_C(0xD2B9AEA010750D7E), + UINT64_C(0xD8D1E69031078AA5), UINT64_C(0x9E837F187E37BC8B), + UINT64_C(0x9558FF4F85008975), UINT64_C(0x93EDB837421FE867) } }, + { { UINT64_C(0xAA6489DF83D55B5A), UINT64_C(0xEA092E4986BF27F7), + UINT64_C(0x4D8943A95FA2EFEC), UINT64_C(0xC9BAAE53720E1A8C), + UINT64_C(0xC055444B95A4F8A3), UINT64_C(0x93BD01E8A7C1206B) }, + { UINT64_C(0xD97765B6714A27DF), UINT64_C(0xD622D954193F1B16), + UINT64_C(0x115CC35AF1503B15), UINT64_C(0x1DD5359FA9FA21F8), + UINT64_C(0x197C32996DFED1F1), UINT64_C(0xDEE8B7C9F77F2679) } }, + { { UINT64_C(0x5405179F394FD855), UINT64_C(0xC9D6E24449FDFB33), + UINT64_C(0x70EBCAB4BD903393), UINT64_C(0x0D3A3899A2C56780), + UINT64_C(0x012C7256683D1A0A), UINT64_C(0xC688FC8880A48F3B) }, + { UINT64_C(0x180957546F7DF527), UINT64_C(0x9E339B4B71315D16), + UINT64_C(0x90560C28A956BB12), UINT64_C(0x2BECEA60D42EEE8D), + UINT64_C(0x82AEB9A750632653), UINT64_C(0xED34353EDFA5CD6A) } }, + { { UINT64_C(0x82154D2C91AECCE4), UINT64_C(0x312C60705041887F), + UINT64_C(0xECF589F3FB9FBD71), UINT64_C(0x67660A7DB524BDE4), + UINT64_C(0xE99B029D724ACF23), UINT64_C(0xDF06E4AF6D1CD891) }, + { UINT64_C(0x07806CB580EE304D), UINT64_C(0x0C70BB9F7443A8F8), + UINT64_C(0x01EC341408B0830A), UINT64_C(0xFD7B63C35A81510B), + UINT64_C(0xE90A0A39453B5F93), UINT64_C(0xAB700F8F9BC71725) } }, + { { UINT64_C(0x9401AEC2B9F00793), UINT64_C(0x064EC4F4B997F0BF), + UINT64_C(0xDC0CC1FD849240C8), UINT64_C(0x39A75F37B6E92D72), + UINT64_C(0xAA43CA5D0224A4AB), UINT64_C(0x9C4D632554614C47) }, + { UINT64_C(0x1767366FC6709DA3), UINT64_C(0xA6B482D123479232), + UINT64_C(0x54DC6DDC84D63E85), UINT64_C(0x0ACCB5ADC99D3B9E), + UINT64_C(0x211716BBE8AA3ABF), UINT64_C(0xD0FE25AD69EC6406) } }, + { { UINT64_C(0x0D5C1769DF85C705), UINT64_C(0x7086C93DA409DCD1), + UINT64_C(0x9710839D0E8D75D8), UINT64_C(0x17B7DB75EBDD4177), + UINT64_C(0xAF69EB58F649A809), UINT64_C(0x6EF19EA28A84E220) }, + { UINT64_C(0x36EB5C6665C278B2), UINT64_C(0xD2A1512881EA9D65), + UINT64_C(0x4FCBA840769300AD), UINT64_C(0xC2052CCDC8E536E5), + UINT64_C(0x9CAEE014AC263B8F), UINT64_C(0x56F7ED7AF9239663) } }, + { { UINT64_C(0xF6FA251FAC9E09E1), UINT64_C(0xA3775605955A2853), + UINT64_C(0x977B8D21F2A4BD78), UINT64_C(0xF68AA7FF3E096410), + UINT64_C(0x01AB055265F88419), UINT64_C(0xC4C8D77EBB93F64E) }, + { UINT64_C(0x718251113451FE64), UINT64_C(0xFA0F905B46F9BAF0), + UINT64_C(0x79BE3BF3CA49EF1A), UINT64_C(0x831109B26CB02071), + UINT64_C(0x765F935FC4DDBFE5), UINT64_C(0x6F99CD1480E5A3BA) } }, + { { UINT64_C(0xD2E8DA04234F91FF), UINT64_C(0x4DED4D6D813867AA), + UINT64_C(0x3B50175DE0A0D945), UINT64_C(0x55AC74064EB78137), + UINT64_C(0xE9FA7F6EE1D47730), UINT64_C(0x2C1715315CBF2176) }, + { UINT64_C(0xA521788F2BE7A47D), UINT64_C(0x95B15A273FCF1AB3), + UINT64_C(0xAADA6401F28A946A), UINT64_C(0x628B2EF48B4E898B), + UINT64_C(0x0E6F46296D6592CC), UINT64_C(0x997C7094A723CADD) } }, + { { UINT64_C(0x878BCE116AFE80C6), UINT64_C(0xA89ABC9D007BBA38), + UINT64_C(0xB0C1F87BA7CC267F), UINT64_C(0x86D33B9D5104FF04), + UINT64_C(0xB0504B1B2EF1BA42), UINT64_C(0x21693048B2827E88) }, + { UINT64_C(0x11F1CCD579CFCD14), UINT64_C(0x59C09FFA94AD227E), + UINT64_C(0x95A4ADCB3EA91ACF), UINT64_C(0x1346238BB4370BAA), + UINT64_C(0xB099D2023E1367B0), UINT64_C(0xCF5BBDE690F23CEA) } }, + { { UINT64_C(0x453299BBBCB3BE5E), UINT64_C(0x123C588E38E9FF97), + UINT64_C(0x8C115DD9F6A2E521), UINT64_C(0x6E333C11FF7D4B98), + UINT64_C(0x9DD061E5DA73E736), UINT64_C(0xC6AB7B3A5CA53056) }, + { UINT64_C(0xF1EF3EE35B30A76B), UINT64_C(0xADD6B44A961BA11F), + UINT64_C(0x7BB00B752CA6E030), UINT64_C(0x270272E82FE270AD), + UINT64_C(0x23BC6F4F241A9239), UINT64_C(0x88581E130BB94A94) } }, + { { UINT64_C(0xBD225A6924EEF67F), UINT64_C(0x7CFD96140412CEB7), + UINT64_C(0xF6DE167999AC298E), UINT64_C(0xB20FD895ED6C3571), + UINT64_C(0x03C73B7861836C56), UINT64_C(0xEE3C3A16ABA6CB34) }, + { UINT64_C(0x9E8C56674138408A), UINT64_C(0xEC25FCB12DD6EBDF), + UINT64_C(0xC54C33FDDBBDF6E3), UINT64_C(0x93E0913B4A3C9DD4), + UINT64_C(0x66D7D13535EDEED4), UINT64_C(0xD29A36C4453FB66E) } }, + { { UINT64_C(0x7F192F039F1943AF), UINT64_C(0x6488163F4E0B5FB0), + UINT64_C(0x66A45C6953599226), UINT64_C(0x924E2E439AD15A73), + UINT64_C(0x8B553DB742A99D76), UINT64_C(0x4BC6B53B0451F521) }, + { UINT64_C(0xC029B5EF101F8AD6), UINT64_C(0x6A4DA71CC507EED9), + UINT64_C(0x3ADFAEC030BB22F3), UINT64_C(0x81BCAF7AB514F85B), + UINT64_C(0x2E1E6EFF5A7E60D3), UINT64_C(0x5270ABC0AE39D42F) } }, + { { UINT64_C(0x86D56DEB3901F0F8), UINT64_C(0x1D0BC792EED5F650), + UINT64_C(0x1A2DDFD8CA1114A3), UINT64_C(0x94ABF4B1F1DD316D), + UINT64_C(0xF72179E43D9F18EF), UINT64_C(0x52A0921E9AA2CABF) }, + { UINT64_C(0xECDA9E27A7452883), UINT64_C(0x7E90850AAFD771B4), + UINT64_C(0xD40F87EA9CC0465C), UINT64_C(0x8CFCB60A865CDA36), + UINT64_C(0x3DBEC2CC7C650942), UINT64_C(0x071A4EE7E718CA9D) } }, + { { UINT64_C(0x73C0E4FF276AC5F3), UINT64_C(0xE7BA5A6ABDB97EA1), + UINT64_C(0x638CA54EC5808398), UINT64_C(0x8258DC82413855E5), + UINT64_C(0x35DDD2E957F07614), UINT64_C(0xF98DD6921DC13BF9) }, + { UINT64_C(0x3A4C0088F16DCD84), UINT64_C(0xF192EADD833D83F9), + UINT64_C(0x3C26C931A6D61D29), UINT64_C(0x589FDD52DE0AD7A1), + UINT64_C(0x7CD83DD20442D37F), UINT64_C(0x1E47E777403ECBFC) } }, + }, + { + { { UINT64_C(0x2AF8ED8170D4D7BC), UINT64_C(0xABC3E15FB632435C), + UINT64_C(0x4C0E726F78219356), UINT64_C(0x8C1962A1B87254C4), + UINT64_C(0x30796A71C9E7691A), UINT64_C(0xD453EF19A75A12EE) }, + { UINT64_C(0x535F42C213AE4964), UINT64_C(0x86831C3C0DA9586A), + UINT64_C(0xB7F1EF35E39A7A58), UINT64_C(0xA2789AE2D459B91A), + UINT64_C(0xEADBCA7F02FD429D), UINT64_C(0x94F215D465290F57) } }, + { { UINT64_C(0x37ED2BE51CFB79AC), UINT64_C(0x801946F3E7AF84C3), + UINT64_C(0xB061AD8AE77C2F00), UINT64_C(0xE87E1A9A44DE16A8), + UINT64_C(0xDF4F57C87EE490FF), UINT64_C(0x4E793B49005993ED) }, + { UINT64_C(0xE1036387BCCB593F), UINT64_C(0xF174941195E09B80), + UINT64_C(0x59CB20D15AB42F91), UINT64_C(0xA738A18DAC0FF033), + UINT64_C(0xDA501A2E2AC1E7F4), UINT64_C(0x1B67EDA084D8A6E0) } }, + { { UINT64_C(0x1D27EFCE1080E90B), UINT64_C(0xA28152463FD01DC6), + UINT64_C(0x99A3FB83CAA26D18), UINT64_C(0xD27E6133B82BABBE), + UINT64_C(0x61030DFDD783DD60), UINT64_C(0x295A291373C78CB8) }, + { UINT64_C(0x8707A2CF68BE6A92), UINT64_C(0xC9C2FB98EEB3474A), + UINT64_C(0x7C3FD412A2B176B8), UINT64_C(0xD5B52E2FC7202101), + UINT64_C(0x24A63030F0A6D536), UINT64_C(0x05842DE304648EC0) } }, + { { UINT64_C(0x67477CDC30577AC9), UINT64_C(0x51DD9775244F92A8), + UINT64_C(0x31FD60B9917EEC66), UINT64_C(0xACD95BD4D66C5C1D), + UINT64_C(0x2E0551F3BF9508BA), UINT64_C(0x121168E1688CB243) }, + { UINT64_C(0x8C0397404540D230), UINT64_C(0xC4ED3CF6009ECDF9), + UINT64_C(0x191825E144DB62AF), UINT64_C(0x3EE8ACABC4A030DA), + UINT64_C(0x8AB154A894081504), UINT64_C(0x1FE09E4B486C9CD0) } }, + { { UINT64_C(0x512F82F9D113450B), UINT64_C(0x5878C9012DBC9197), + UINT64_C(0xDB87412BE13F355B), UINT64_C(0x0A0A4A9B935B8A5E), + UINT64_C(0x818587BDF25A5351), UINT64_C(0xE807931031E3D9C7) }, + { UINT64_C(0x8B1D47C7611BC1B1), UINT64_C(0x51722B5872A823F2), + UINT64_C(0x6F97EE8A53B36B3E), UINT64_C(0x6E085AAC946DD453), + UINT64_C(0x2EC5057DE65E6533), UINT64_C(0xF82D9D714BB18801) } }, + { { UINT64_C(0xAD81FA938BA5AA8E), UINT64_C(0x723E628E8F7AA69E), + UINT64_C(0x0BA7C2DEEF35937C), UINT64_C(0x83A43EC56DECFB40), + UINT64_C(0xF520F849E60C4F2D), UINT64_C(0x8260E8AE457E3B5E) }, + { UINT64_C(0x7CE874F0BF1D9ED7), UINT64_C(0x5FDE35537F1A5466), + UINT64_C(0x5A63777C0C162DBB), UINT64_C(0x0FD04F8CDAD87289), + UINT64_C(0xCA2D9E0E640761D5), UINT64_C(0x4615CFF838501ADB) } }, + { { UINT64_C(0x9422789B110B4A25), UINT64_C(0x5C26779F70AD8CC1), + UINT64_C(0x4EE6A748EC4F1E14), UINT64_C(0xFB584A0D5C7AB5E0), + UINT64_C(0xED1DCB0BFB21EE66), UINT64_C(0xDBED1F0011C6863C) }, + { UINT64_C(0xD2969269B1B1D187), UINT64_C(0xF7D0C3F2AFE964E6), + UINT64_C(0xE05EE93F12BB865E), UINT64_C(0x1AFB7BEEED79118E), + UINT64_C(0x220AF1380F0FE453), UINT64_C(0x1463AA1A52782AB9) } }, + { { UINT64_C(0x7C139D56D7DBE5F9), UINT64_C(0xFC16E6110B83685B), + UINT64_C(0xFA723C029018463C), UINT64_C(0xC472458C840BF5D7), + UINT64_C(0x4D8093590AF07591), UINT64_C(0x418D88303308DFD9) }, + { UINT64_C(0x9B381E040C365AE3), UINT64_C(0x3780BF33F8190FD1), + UINT64_C(0x45397418DD03E854), UINT64_C(0xA95D030F4E51E491), + UINT64_C(0x87C8C686E3286CEA), UINT64_C(0x01C773BF900B5F83) } }, + { { UINT64_C(0xDABE347578673B02), UINT64_C(0x4F0F25CEF6E7395E), + UINT64_C(0x3117ABB9D181AD45), UINT64_C(0x4B559F88AA13DE0B), + UINT64_C(0xFD8EFE78EA7C9745), UINT64_C(0x080600475DD21682) }, + { UINT64_C(0xC0F5DE4BD4C86FFC), UINT64_C(0x4BB14B1EF21AB6A2), + UINT64_C(0xACB53A6CF50C1D12), UINT64_C(0x46AAC4505CC9162E), + UINT64_C(0x049C51E02DE240B6), UINT64_C(0xBB2DC016E383C3B0) } }, + { { UINT64_C(0xA3C56AD28E438C92), UINT64_C(0x7C43F98FB2CEAF1A), + UINT64_C(0x397C44F7E2150778), UINT64_C(0x48D17AB771A24131), + UINT64_C(0xCC5138631E2ACDA9), UINT64_C(0x2C76A55EF0C9BAC9) }, + { UINT64_C(0x4D74CDCE7EA4BB7B), UINT64_C(0x834BD5BFB1B3C2BA), + UINT64_C(0x46E2911ECCC310A4), UINT64_C(0xD3DE84AA0FC1BF13), + UINT64_C(0x27F2892F80A03AD3), UINT64_C(0x85B476203BD2F08B) } }, + { { UINT64_C(0xAB1CB818567AF533), UINT64_C(0x273B4537BAC2705A), + UINT64_C(0x133066C422C84AB6), UINT64_C(0xC3590DE64830BFC1), + UINT64_C(0xEA2978695E4742D0), UINT64_C(0xF6D8C6944F3164C0) }, + { UINT64_C(0x09E85F3DC1249588), UINT64_C(0x6C2BB05D4EC64DF7), + UINT64_C(0xD267115E8B78000F), UINT64_C(0x07C5D7AEC7E4A316), + UINT64_C(0xCB1187BA4619E5BD), UINT64_C(0x57B1D4EFA43F7EEE) } }, + { { UINT64_C(0x3618891FC8176A96), UINT64_C(0x62C4B084E5808B97), + UINT64_C(0xDE5585464DD95D6E), UINT64_C(0x27A8133E730B2EA4), + UINT64_C(0xE07CEEC36AF318A0), UINT64_C(0x0ACC1286CE24FD2C) }, + { UINT64_C(0x8A48FE4ADD4D307C), UINT64_C(0x71A9BA9C18CDE0DA), + UINT64_C(0x655E2B66D5D79747), UINT64_C(0x409FE856A79AEDC7), + UINT64_C(0xC5A9F244D287E5CF), UINT64_C(0xCCE103844E82EC39) } }, + { { UINT64_C(0x00675BA7F25D364C), UINT64_C(0x7A7F162968D36BDF), + UINT64_C(0x35EC468AA9E23F29), UINT64_C(0xF797AC502D926E6C), + UINT64_C(0x639BA4534B4F4376), UINT64_C(0xD71B430F51FF9519) }, + { UINT64_C(0xB8C439EC2CF5635C), UINT64_C(0x0CE4C8D181980393), + UINT64_C(0x4C5362A964123B15), UINT64_C(0x6E0421E0FFDCF096), + UINT64_C(0x624A855F10D1F914), UINT64_C(0x7D8F3AB7614DCD29) } }, + { { UINT64_C(0xD9219ADAB3493CE0), UINT64_C(0x971B243A52F09AE5), + UINT64_C(0xC16C9BF8E24E3674), UINT64_C(0x026D408DCE68C7CD), + UINT64_C(0xF9B33DD9358209E3), UINT64_C(0x02D0595DF3B2A206) }, + { UINT64_C(0xBF99427160D15640), UINT64_C(0x6DA7A04E15B5466A), + UINT64_C(0x03AA4ED81CADB50D), UINT64_C(0x1548F029129A4253), + UINT64_C(0x41741F7EB842865A), UINT64_C(0x859FE0A4A3F88C98) } }, + { { UINT64_C(0x80DE085A05FD7553), UINT64_C(0x4A4AB91EB897566B), + UINT64_C(0x33BCD4752F1C173F), UINT64_C(0x4E238896C100C013), + UINT64_C(0x1C88500DD614B34B), UINT64_C(0x0401C5F6C3BA9E23) }, + { UINT64_C(0x8E8003C4D0AF0DE5), UINT64_C(0x19B1DFB59D0DCBB9), + UINT64_C(0x4A3640A9EBEF7AB6), UINT64_C(0xEDAFD65B959B15F6), + UINT64_C(0x8092EF7F7FB95821), UINT64_C(0xAB8DD52ECE2E45D1) } }, + { { UINT64_C(0xD1F2D6B8B9CFE6BF), UINT64_C(0x6358810B00073F6F), + UINT64_C(0x5FCE5993D712106E), UINT64_C(0x5EE6B2711C024C91), + UINT64_C(0xD0248FF5453DB663), UINT64_C(0xD6D81CB2ADB835E8) }, + { UINT64_C(0x8696CFECFDFCB4C7), UINT64_C(0x696B7FCB53BC9045), + UINT64_C(0xAB4D3807DDA56981), UINT64_C(0x2F9980521E4B943B), + UINT64_C(0x8AA76ADB166B7F18), UINT64_C(0x6393430152A2D7ED) } }, + }, + { + { { UINT64_C(0xBBCCCE39A368EFF6), UINT64_C(0xD8CAABDF8CEB5C43), + UINT64_C(0x9EAE35A5D2252FDA), UINT64_C(0xA8F4F20954E7DD49), + UINT64_C(0xA56D72A6295100FD), UINT64_C(0x20FC1FE856767727) }, + { UINT64_C(0xBF60B2480BBAA5AB), UINT64_C(0xA4F3CE5A313911F2), + UINT64_C(0xC2A67AD4B93DAB9C), UINT64_C(0x18CD0ED022D71F39), + UINT64_C(0x04380C425F304DB2), UINT64_C(0x26420CBB6729C821) } }, + { { UINT64_C(0x26BD07D6BDFBCAE8), UINT64_C(0x10B5173FDF01A80A), + UINT64_C(0xD831C5466798B96C), UINT64_C(0x1D6B41081D3F3859), + UINT64_C(0x501D38EC991B9EC7), UINT64_C(0x26319283D78431A9) }, + { UINT64_C(0x8B85BAF7118B343C), UINT64_C(0x4696CDDD58DEF7D0), + UINT64_C(0xEFC7C1107ACDCF58), UINT64_C(0xD9AF415C848D5842), + UINT64_C(0x6B5A06BC0AC7FDAC), UINT64_C(0x7D623E0DA344319B) } }, + { { UINT64_C(0x4C0D78060C9D3547), UINT64_C(0x993F048DCF2AED47), + UINT64_C(0x5217C453E4B57E22), UINT64_C(0xB4669E35F4172B28), + UINT64_C(0x509A3CD049F999F8), UINT64_C(0xD19F863287C69D41) }, + { UINT64_C(0xE14D01E84C8FDED0), UINT64_C(0x342880FDEAFD9E1C), + UINT64_C(0x0E17BFF270DC2BF0), UINT64_C(0x46560B7BC0186400), + UINT64_C(0xE28C7B9C49A4DD34), UINT64_C(0x182119160F325D06) } }, + { { UINT64_C(0x46D70888D7E02E18), UINT64_C(0x7C806954D9F11FD9), + UINT64_C(0xE4948FCA4FBEA271), UINT64_C(0x7D6C7765BD80A9DF), + UINT64_C(0x1B470EA6F3871C71), UINT64_C(0xD62DE2448330A570) }, + { UINT64_C(0xDAECDDC1C659C3A7), UINT64_C(0x8621E513077F7AFC), + UINT64_C(0x56C7CD84CAEEEF13), UINT64_C(0xC60C910FC685A356), + UINT64_C(0xE68BC5C59DD93DDC), UINT64_C(0xD904E89FFEB64895) } }, + { { UINT64_C(0x75D874FB8BA7917A), UINT64_C(0x18FA7F53FD043BD4), + UINT64_C(0x212A0AD71FC3979E), UINT64_C(0x5703A7D95D6EAC0E), + UINT64_C(0x222F7188017DEAD5), UINT64_C(0x1EC687B70F6C1817) }, + { UINT64_C(0x23412FC3238BACB6), UINT64_C(0xB85D70E954CED154), + UINT64_C(0xD4E06722BDA674D0), UINT64_C(0x3EA5F17836F5A0C2), + UINT64_C(0x7E7D79CFF5C6D2CA), UINT64_C(0x1FFF94643DBB3C73) } }, + { { UINT64_C(0x916E19D0F163E4A8), UINT64_C(0x1E6740E71489DF17), + UINT64_C(0x1EAF9723339F3A47), UINT64_C(0x22F0ED1A124B8DAD), + UINT64_C(0x39C9166C49C3DD04), UINT64_C(0x628E7FD4CE1E9ACC) }, + { UINT64_C(0x124DDF2740031676), UINT64_C(0x002569391EDDB9BE), + UINT64_C(0xD39E25E7D360B0DA), UINT64_C(0x6E3015A84AA6C4C9), + UINT64_C(0xC6A2F643623EDA09), UINT64_C(0xBEFF2D1250AA99FB) } }, + { { UINT64_C(0x1FEEF7CE93EE8089), UINT64_C(0xC6B180BC252DD7BD), + UINT64_C(0xA16FB20B1788F051), UINT64_C(0xD86FD392E046ED39), + UINT64_C(0xDA0A36119378CE1D), UINT64_C(0x121EF3E7A5F7A61D) }, + { UINT64_C(0x94D2206192D13CAE), UINT64_C(0x5076046A77C72E08), + UINT64_C(0xF18BC2337D2308B9), UINT64_C(0x004DB3C517F977B1), + UINT64_C(0xD05AE3990471C11D), UINT64_C(0x86A2A55785CD1726) } }, + { { UINT64_C(0xB8D9B28672107804), UINT64_C(0xB5A7C4133303B79B), + UINT64_C(0x927EEF785FA37DED), UINT64_C(0xA1C5CF1EAD67DABA), + UINT64_C(0xAA5E3FB27360E7C7), UINT64_C(0x8354E61A0A0C0993) }, + { UINT64_C(0x2EC73AF97F5458CC), UINT64_C(0xDE4CB48848474325), + UINT64_C(0x2DD134C77209BC69), UINT64_C(0xB70C5567451A2ABE), + UINT64_C(0x2CD1B2008E293018), UINT64_C(0x15F8DA7AD33C0D72) } }, + { { UINT64_C(0x5DC386D0A8790657), UINT64_C(0xA4FDF676BC4D88BB), + UINT64_C(0x1B21F38F48BC6C49), UINT64_C(0xCDCC7FAA543A7003), + UINT64_C(0xEA97E7AA8C9CF72C), UINT64_C(0xA6B883F450D938A8) }, + { UINT64_C(0x51936F3AA3A10F27), UINT64_C(0x0170785FDECC76BF), + UINT64_C(0x7539ECE1908C578A), UINT64_C(0x5D9C8A8E0F3E8C25), + UINT64_C(0x8681B43B9E4717A7), UINT64_C(0x94F42507A9D83E39) } }, + { { UINT64_C(0xBBE11CA8A55ADDE7), UINT64_C(0x39E6F5CF3BC0896B), + UINT64_C(0x1447314E1D2D8D94), UINT64_C(0x45B481255B012F8A), + UINT64_C(0x41AD23FA08AD5283), UINT64_C(0x837243E241D13774) }, + { UINT64_C(0x1FC0BD9DBADCAA46), UINT64_C(0x8DF164ED26E84CAE), + UINT64_C(0x8FF70EC041017176), UINT64_C(0x23AD4BCE5C848BA7), + UINT64_C(0x89246FDE97A19CBB), UINT64_C(0xA5EF987B78397991) } }, + { { UINT64_C(0x111AF1B74757964D), UINT64_C(0x1D25D351DDBBF258), + UINT64_C(0x4161E7767D2B06D6), UINT64_C(0x6EFD26911CAC0C5B), + UINT64_C(0x633B95DB211BFAEB), UINT64_C(0x9BEDFA5AE2BDF701) }, + { UINT64_C(0xADAC2B0B73E099C8), UINT64_C(0x436F0023BFB16BFF), + UINT64_C(0xB91B100230F55854), UINT64_C(0xAF6A2097F4C6C8B7), + UINT64_C(0x3FF65CED3AD7B3D9), UINT64_C(0x6FA2626F330E56DF) } }, + { { UINT64_C(0x3D28BF2DFFCCFD07), UINT64_C(0x0514F6FFD989603B), + UINT64_C(0xB95196295514787A), UINT64_C(0xA1848121C3DB4E9C), + UINT64_C(0x47FE2E392A3D4595), UINT64_C(0x506F5D8211B73ED4) }, + { UINT64_C(0xA2257AE7A600D8BB), UINT64_C(0xD659DBD10F9F122C), + UINT64_C(0xDB0FDC6764DF160F), UINT64_C(0xFF3793397CB19690), + UINT64_C(0xDF4366B898E72EC1), UINT64_C(0x97E72BECDF437EB8) } }, + { { UINT64_C(0x81DCEA271C81E5D9), UINT64_C(0x7E1B6CDA6717FC49), + UINT64_C(0xAA36B3B511EAE80D), UINT64_C(0x1306687C3CD7CBB3), + UINT64_C(0xED670235C4E89064), UINT64_C(0x9D3B000958A94760) }, + { UINT64_C(0x5A64E158E6A6333C), UINT64_C(0x1A8B4A3649453203), + UINT64_C(0xF1CAD7241F77CC21), UINT64_C(0x693EBB4B70518EF7), + UINT64_C(0xFB47BD810F39C91A), UINT64_C(0xCFE63DA2FA4BC64B) } }, + { { UINT64_C(0x82C1C684EAA66108), UINT64_C(0xE32262184CFE79FC), + UINT64_C(0x3F28B72B849C720E), UINT64_C(0x137FB3558FEE1CA8), + UINT64_C(0x4D18A9CDE4F90C4E), UINT64_C(0xC0344227CC3E46FA) }, + { UINT64_C(0x4FD5C08E79CDA392), UINT64_C(0x65DB20DB8ADC87B5), + UINT64_C(0x86F95D5B916C1B84), UINT64_C(0x7EDA387117BB2B7C), + UINT64_C(0x18CCF7E7669A533B), UINT64_C(0x5E92421CECAD0E06) } }, + { { UINT64_C(0x26063E124174B08B), UINT64_C(0xE621D9BE70DE8E4D), + UINT64_C(0xAEA0FD0F5ECDF350), UINT64_C(0x0D9F69E49C20E5C9), + UINT64_C(0xD3DADEB90BBE2918), UINT64_C(0xD7B9B5DB58AA2F71) }, + { UINT64_C(0x7A971DD73364CAF8), UINT64_C(0x702616A3C25D4BE4), + UINT64_C(0xA30F0FA1A9E30071), UINT64_C(0x98AB24385573BC69), + UINT64_C(0xCBC63CDF6FEC2E22), UINT64_C(0x965F90EDCC901B9B) } }, + { { UINT64_C(0xD53B592D71E15BB3), UINT64_C(0x1F03C0E98820E0D0), + UINT64_C(0xCE93947D3CCCB726), UINT64_C(0x2790FEE01D547590), + UINT64_C(0x4401D847C59CDD7A), UINT64_C(0x72D69120A926DD9D) }, + { UINT64_C(0x38B8F21D4229F289), UINT64_C(0x9F412E407FE978AF), + UINT64_C(0xAE07901BCDB59AF1), UINT64_C(0x1E6BE5EBD1D4715E), + UINT64_C(0x3715BD8B18C96BEF), UINT64_C(0x4B71F6E6E11B3798) } }, + }, + { + { { UINT64_C(0x11A8FDE5F0CE2DF4), UINT64_C(0xBC70CA3EFA8D26DF), + UINT64_C(0x6818C275C74DFE82), UINT64_C(0x2B0294AC38373A50), + UINT64_C(0x584C4061E8E5F88F), UINT64_C(0x1C05C1CA7342383A) }, + { UINT64_C(0x263895B3911430EC), UINT64_C(0xEF9B0032A5171453), + UINT64_C(0x144359DA84DA7F0C), UINT64_C(0x76E3095A924A09F2), + UINT64_C(0x612986E3D69AD835), UINT64_C(0x70E03ADA392122AF) } }, + { { UINT64_C(0xFEB707EE67AAD17B), UINT64_C(0xBB21B28783042995), + UINT64_C(0x26DE16459A0D32BA), UINT64_C(0x9A2FF38A1FFB9266), + UINT64_C(0x4E5AD96D8F578B4A), UINT64_C(0x26CC0655883E7443) }, + { UINT64_C(0x1D8EECAB2EE9367A), UINT64_C(0x42B84337881DE2F8), + UINT64_C(0xE49B2FAED758AE41), UINT64_C(0x6A9A22904A85D867), + UINT64_C(0x2FB89DCEE68CBA86), UINT64_C(0xBC2526357F09A982) } }, + { { UINT64_C(0xADC794368C61AAAC), UINT64_C(0x24C7FD135E926563), + UINT64_C(0xEF9FAAA40406C129), UINT64_C(0xF4E6388C8B658D3C), + UINT64_C(0x7262BEB41E435BAF), UINT64_C(0x3BF622CCFDAEAC99) }, + { UINT64_C(0xD359F7D84E1AEDDC), UINT64_C(0x05DC4F8CD78C17B7), + UINT64_C(0xB18CF03229498BA5), UINT64_C(0xC67388CA85BF35AD), + UINT64_C(0x8A7A6AA262AA4BC8), UINT64_C(0x0B8F458E72F4627A) } }, + { { UINT64_C(0x3FB812EEC68E4488), UINT64_C(0x53C5EAA460EF7281), + UINT64_C(0xE57241838FBEFBE4), UINT64_C(0x2B7D49F4A4B24A05), + UINT64_C(0x23B138D0710C0A43), UINT64_C(0x16A5B4C1A85EC1DB) }, + { UINT64_C(0x7CC1F3D7305FEB02), UINT64_C(0x52F7947D5B6C1B54), + UINT64_C(0x1BDA23128F56981C), UINT64_C(0x68663EAEB4080A01), + UINT64_C(0x8DD7BA7E9F999B7F), UINT64_C(0xD8768D19B686580C) } }, + { { UINT64_C(0xBCD0E0AD7AFDDA94), UINT64_C(0x95A0DBBE34A30687), + UINT64_C(0xBBE3C3DF8C5E2665), UINT64_C(0x742BECD8EBF2BC16), + UINT64_C(0x300CEB483FA163A6), UINT64_C(0x0C5D02EE4663354B) }, + { UINT64_C(0xE4FB9AD6B5E606A4), UINT64_C(0x93F507B8CF49FF95), + UINT64_C(0x9406A90C585C193B), UINT64_C(0xAD1440C14ECF9517), + UINT64_C(0x184CB4759CEA53F1), UINT64_C(0x6855C4748EF11302) } }, + { { UINT64_C(0x00ECB523EDCAFA52), UINT64_C(0x0DA0AE0E086F69D3), + UINT64_C(0xC384DE15C242F347), UINT64_C(0xFB050E6E848C12B7), + UINT64_C(0x22F6765464E015CE), UINT64_C(0xCBDC2A487CA122F2) }, + { UINT64_C(0xA940D973445FB02C), UINT64_C(0x00F31E783767D89D), + UINT64_C(0x2B65A237613DABDD), UINT64_C(0x2BE0AB05C875AE09), + UINT64_C(0xB22E54FDBA204F8E), UINT64_C(0x65E2029D0F7687B9) } }, + { { UINT64_C(0xFFD825381855A71C), UINT64_C(0x26A330B3438BD8D8), + UINT64_C(0x89628311F9D8C5F9), UINT64_C(0x8D5FB9CF953738A0), + UINT64_C(0xCB7159C9EDFCD4E5), UINT64_C(0xD64E52302064C7C2) }, + { UINT64_C(0xF858ED80689F3CFE), UINT64_C(0x4830E30956128B67), + UINT64_C(0x2E1692DAE0E90688), UINT64_C(0xAB818913CA9CC232), + UINT64_C(0xE2E30C23A5D229A6), UINT64_C(0xA544E8B10E740E23) } }, + { { UINT64_C(0x1C15E569DC61E6CC), UINT64_C(0x8FD7296758FC7800), + UINT64_C(0xE61E7DB737A9DFC5), UINT64_C(0x3F34A9C65AFD7822), + UINT64_C(0x0A11274219E80773), UINT64_C(0xA353460C4760FC58) }, + { UINT64_C(0x2FB7DEEBB3124C71), UINT64_C(0x484636272D4009CC), + UINT64_C(0x399D1933C3A10370), UINT64_C(0x7EB1945054388DBD), + UINT64_C(0x8ECCE6397C2A006A), UINT64_C(0x3D565DAF55C932A0) } }, + { { UINT64_C(0xCEF57A9FD9ADAE53), UINT64_C(0xE2EB27D7F83FD8CD), + UINT64_C(0x4AC8F7199BBD2DDE), UINT64_C(0x604283AAE91ABFB7), + UINT64_C(0xB6A4E11534799F87), UINT64_C(0x2B253224E4C2A8F3) }, + { UINT64_C(0xC34F8B92C8782294), UINT64_C(0xC74D697DFCC2CB6B), + UINT64_C(0xD990411BC2C84C46), UINT64_C(0x2807B5C631EA4955), + UINT64_C(0x14AE2B93B9EB27F5), UINT64_C(0xF0AE96A76163EDFA) } }, + { { UINT64_C(0xA7BDCBB442DB7180), UINT64_C(0xC9FAA41FEDCA752F), + UINT64_C(0x147F91B4E820F401), UINT64_C(0x1E6CEF86F5F2645F), + UINT64_C(0xB4AB4D7F31FE711D), UINT64_C(0xCE68FB3C743EF882) }, + { UINT64_C(0xB9D7D6823EF2FCFF), UINT64_C(0xF6893811020DCAFD), + UINT64_C(0x30D9A50CBF81E760), UINT64_C(0x7F247D06B9B87228), + UINT64_C(0x143D4FEC5F40CFC0), UINT64_C(0x21D78D73329B2A88) } }, + { { UINT64_C(0x06B3FF8AED3F2055), UINT64_C(0x50482C77522BE214), + UINT64_C(0x8DF69CD8DDF54620), UINT64_C(0x6D1DB204F78A1165), + UINT64_C(0x459AE4A29AFE6BF2), UINT64_C(0xC23A9FFD24AC871E) }, + { UINT64_C(0xB7FD22E389E85D81), UINT64_C(0x297F1F6B122E9978), + UINT64_C(0xAB283D66144BE1CE), UINT64_C(0xC1F90AC2C00C614E), + UINT64_C(0x5465576E3224CD09), UINT64_C(0x8E8D910D441B6059) } }, + { { UINT64_C(0xF73A060AAAA228BC), UINT64_C(0xCF1B078356EFF87D), + UINT64_C(0x11EF17C0A54C9133), UINT64_C(0x9E476B1576A4DAA5), + UINT64_C(0x5624FEAC8018FB92), UINT64_C(0x9826A0FCCFEEC1B9) }, + { UINT64_C(0xB732F7FE2DFE2046), UINT64_C(0x9260BD9F3B40DA6A), + UINT64_C(0xCC9F908F4F231773), UINT64_C(0x4827FEB9DAFC0D55), + UINT64_C(0x07D32E85538ACE95), UINT64_C(0xAD9F897CB8EDAF37) } }, + { { UINT64_C(0x2F75B82FE3415498), UINT64_C(0xF99CAC5FF1015F30), + UINT64_C(0x766408247D7F25DE), UINT64_C(0x714BC9CDEE74C047), + UINT64_C(0x70F847BF07448879), UINT64_C(0xA14481DE072165C0) }, + { UINT64_C(0x9BFA59E3DB1140A8), UINT64_C(0x7B9C7FF0FCD13502), + UINT64_C(0xF4D7538E68459ABF), UINT64_C(0xED93A791C8FC6AD2), + UINT64_C(0xA8BBE2A8B51BD9B2), UINT64_C(0x084B5A279FB34008) } }, + { { UINT64_C(0xB3BB9545EB138C84), UINT64_C(0x59C3489C3FC88BFD), + UINT64_C(0x3A97FF6385F53EC7), UINT64_C(0x40FDF5A60AA69C3D), + UINT64_C(0x0E8CCEC753D19668), UINT64_C(0x0AA72EF933FAA661) }, + { UINT64_C(0xF5C5A6CF9B1E684B), UINT64_C(0x630F937131A22EA1), + UINT64_C(0x06B2AAC2AC60F7EA), UINT64_C(0xB181CAE25BC37D80), + UINT64_C(0x4601A929247B13EA), UINT64_C(0x8A71C3865F739797) } }, + { { UINT64_C(0x545387B3AB134786), UINT64_C(0x3179BB061599B64A), + UINT64_C(0xB0A6198607593574), UINT64_C(0xC7E39B2163FA7C3B), + UINT64_C(0xA1173F8691585D13), UINT64_C(0x09D5CC8ECB9525CD) }, + { UINT64_C(0xAAD44FFD8F3A3451), UINT64_C(0x702B04F225820CC5), + UINT64_C(0xE90CAC491CB66C17), UINT64_C(0x40F6B547EE161DC4), + UINT64_C(0xC08BB8B41BA4AC4E), UINT64_C(0x7DC064FBAE5A6BC1) } }, + { { UINT64_C(0x90A5E8719D76DDC7), UINT64_C(0x39DC8FAEEDFC8E2E), + UINT64_C(0x98467A235B079C62), UINT64_C(0xE25E378505450C98), + UINT64_C(0x2FE23A4D96140083), UINT64_C(0x65CE3B9AE9900312) }, + { UINT64_C(0x1D87D0886B72B5D9), UINT64_C(0x72F53220FD9AFC82), + UINT64_C(0xC63C7C159E1F71FA), UINT64_C(0x90DF26EA8D449637), + UINT64_C(0x97089F40C1C2B215), UINT64_C(0x83AF266442317FAA) } }, + }, + { + { { UINT64_C(0xFA2DB51A8D688E31), UINT64_C(0x225B696CA09C88D4), + UINT64_C(0x9F88AF1D6059171F), UINT64_C(0x1C5FEA5E782A0993), + UINT64_C(0xE0FB15884EC710D3), UINT64_C(0xFAF372E5D32CE365) }, + { UINT64_C(0xD9F896AB26506F45), UINT64_C(0x8D3503388373C724), + UINT64_C(0x1B76992DCA6E7342), UINT64_C(0x76338FCA6FD0C08B), + UINT64_C(0xC3EA4C65A00F5C23), UINT64_C(0xDFAB29B3B316B35B) } }, + { { UINT64_C(0x84E5541F483AEBF9), UINT64_C(0x8ADFF7DC49165772), + UINT64_C(0xE0A43AD69BEAAD3C), UINT64_C(0x97DD1820F51C2714), + UINT64_C(0xAC2B4CB457EA5B0C), UINT64_C(0x87DBD011D11767CA) }, + { UINT64_C(0x18CCF36CBFC7957A), UINT64_C(0xD4A088411BC79227), + UINT64_C(0x9811CE43D8D292A8), UINT64_C(0x72C5FC68D58C4EE7), + UINT64_C(0x5BC0F0BED35C65A7), UINT64_C(0x0B446DBCCBBF9669) } }, + { { UINT64_C(0x7EBA3DA69CEE9BCE), UINT64_C(0x3E2C1248D5377750), + UINT64_C(0x8C917D982B93D8B2), UINT64_C(0xCA8FC6AC7CAD1F75), + UINT64_C(0x5F581F19A0FF150A), UINT64_C(0x872CC14AE08327FA) }, + { UINT64_C(0xC774F187E9333188), UINT64_C(0x528ED4AC497AF7E8), + UINT64_C(0xCE036E9B8AD72B10), UINT64_C(0x463F9EBB917986CF), + UINT64_C(0xBE5163281325CF9B), UINT64_C(0xD28D5C50DD7E5FEA) } }, + { { UINT64_C(0x714C1D1BDD58BBE3), UINT64_C(0x85BA01AE039AFD0F), + UINT64_C(0x7F23EA3A6951AC80), UINT64_C(0x5C599290AC00C837), + UINT64_C(0xF6EFA2B3BF24CC1B), UINT64_C(0x393D8E421E84462B) }, + { UINT64_C(0x9BDA627DF8B89453), UINT64_C(0xE66FFF2EB23E0D1B), + UINT64_C(0xD1EE7089C3B94EC2), UINT64_C(0xF75DBA6E3031699A), + UINT64_C(0x8FF75F79242B2453), UINT64_C(0xE721EDEB289BFED4) } }, + { { UINT64_C(0x083215A1C1390FA8), UINT64_C(0x901D686A6DCE8CE0), + UINT64_C(0x4AB1BA62837073FF), UINT64_C(0x10C287AA34BEABA5), + UINT64_C(0xB4931AF446985239), UINT64_C(0x07639899B053C4DC) }, + { UINT64_C(0x29E7F44DE721EECD), UINT64_C(0x6581718257B3FF48), + UINT64_C(0x198542E25054E2E0), UINT64_C(0x923C9E1584616DE8), + UINT64_C(0x2A9C15E1AD465BB9), UINT64_C(0xD8D4EFC716319245) } }, + { { UINT64_C(0x72DC79439961A674), UINT64_C(0x839A0A52A0E13668), + UINT64_C(0xD7A53FA9334945EA), UINT64_C(0xDB21DB77E7AA25DB), + UINT64_C(0xB6675A7D66E96DA3), UINT64_C(0x2C31C406E66F33C0) }, + { UINT64_C(0x45020B626EC7B9CB), UINT64_C(0xFF46E9CD0391F267), + UINT64_C(0x7DABD7440FA2F221), UINT64_C(0x9A32364B9D4A2A3E), + UINT64_C(0xF0F84AE852D2E47A), UINT64_C(0xD0B872BB888F488A) } }, + { { UINT64_C(0x531E4CEFC9790EEF), UINT64_C(0xF7B5735E2B8D1A58), + UINT64_C(0xB8882F1EEF568511), UINT64_C(0xAFB08D1C86A86DB3), + UINT64_C(0x88CB9DF2F54DE8C7), UINT64_C(0xA44234F19A683282) }, + { UINT64_C(0xBC1B3D3AA6E9AB2E), UINT64_C(0xEFA071FB87FC99EE), + UINT64_C(0xFA3C737DA102DC0F), UINT64_C(0xDF3248A6D6A0CBD2), + UINT64_C(0x6E62A4FF1ECC1BF4), UINT64_C(0xF718F940C8F1BC17) } }, + { { UINT64_C(0x2C8B0AAD4F63F026), UINT64_C(0x2AFF623850B253CC), + UINT64_C(0xCAB3E94210C4D122), UINT64_C(0x52B59F0407CD2816), + UINT64_C(0x22322803982C41FC), UINT64_C(0x38844E668CF50B19) }, + { UINT64_C(0x42A959F7BE3264CD), UINT64_C(0xBDDC24BD6C983524), + UINT64_C(0xA489EB0C462B8640), UINT64_C(0xB7C0509298029BE7), + UINT64_C(0xD5546B5FA1ADDC64), UINT64_C(0xE7CAC1FCA0C655AF) } }, + { { UINT64_C(0x1454719847636F97), UINT64_C(0x6FA67481EBCDCCFF), + UINT64_C(0xC164872F395D3258), UINT64_C(0xB8CECAFEEE6ACDBC), + UINT64_C(0x3FBFE5F3A933F180), UINT64_C(0xEC20CAC2898C3B1E) }, + { UINT64_C(0x6A031BEE87DA73F9), UINT64_C(0xD1E667D15C5AF46E), + UINT64_C(0xCB3DC1681DC6EEF9), UINT64_C(0x2DD1BD9433D310C0), + UINT64_C(0x0F78D4939207E438), UINT64_C(0xC233D544A99C0E75) } }, + { { UINT64_C(0x228F19F19E2A0113), UINT64_C(0x58495BE50E1A5D37), + UINT64_C(0x97E08F6938D7F364), UINT64_C(0x1EC3BA3E510759B0), + UINT64_C(0x3682F19AE03CD40D), UINT64_C(0xC87745D8F9E16D68) }, + { UINT64_C(0xFD527AB509A642EA), UINT64_C(0x6308EEBDF9C81F27), + UINT64_C(0xFA9F666C550C5D68), UINT64_C(0xDEBA436F584AB153), + UINT64_C(0x1D4861D35B63E939), UINT64_C(0x073BED9BC9850221) } }, + { { UINT64_C(0x802BCCF08B171246), UINT64_C(0xFFF7D15A733B072F), + UINT64_C(0xEA3862664CBFA4EF), UINT64_C(0x9E5B5073D635946B), + UINT64_C(0x16E9A979FA81BE95), UINT64_C(0x41E8716EB14F701F) }, + { UINT64_C(0x25782E0F101A6719), UINT64_C(0x442C4875C9D66959), + UINT64_C(0x52D845D92B85D153), UINT64_C(0xFF9251382E831117), + UINT64_C(0x01B700CC8E02434B), UINT64_C(0xD2DB7F8EEC0BAE3E) } }, + { { UINT64_C(0x1B225300966A4872), UINT64_C(0x40C149BE566F537B), + UINT64_C(0x3335F4D2CB680021), UINT64_C(0x773D0263778E5F5F), + UINT64_C(0x1D9B7602666FA9ED), UINT64_C(0x52490A102E6200CF) }, + { UINT64_C(0x8434C7DD961F290B), UINT64_C(0x773AC15664456446), + UINT64_C(0x5E2BB78947B712BB), UINT64_C(0xFD3BCBFDBE0974AD), + UINT64_C(0x71AE9351791AD5D8), UINT64_C(0x1EE738BA6F4E1400) } }, + { { UINT64_C(0x2FA428AB0BE8E26E), UINT64_C(0xFEFF0600BB4CF9FC), + UINT64_C(0x76F25CA9B2EA5FB0), UINT64_C(0xAB7FECF06835C5F4), + UINT64_C(0x649D077219D5F328), UINT64_C(0xABE7B895ACBCB12E) }, + { UINT64_C(0xF2D1031AD69B1EA8), UINT64_C(0x46065D5DC60B0BBB), + UINT64_C(0xB0908DC185D798FF), UINT64_C(0x4E2420F0D2C9B18A), + UINT64_C(0x6B3A9BDDD30432A2), UINT64_C(0x501C3383C9B134AD) } }, + { { UINT64_C(0x608F096798A21284), UINT64_C(0x5361BE86059CCEDE), + UINT64_C(0x3A40655CAFD87EF7), UINT64_C(0x03CF311759083AA2), + UINT64_C(0x57DB5F61B6C366D9), UINT64_C(0x29DC275B6DD0D232) }, + { UINT64_C(0xBDAB24DD8FA67501), UINT64_C(0x5928F77565D08C37), + UINT64_C(0x9448A856645D466A), UINT64_C(0x6E6B5E2EC0E927A5), + UINT64_C(0xE884D546E80C6871), UINT64_C(0x10C881C953A9A851) } }, + { { UINT64_C(0x355053749B627AA5), UINT64_C(0xE7CA1B577976677B), + UINT64_C(0x812397124976CE17), UINT64_C(0x96E9080B96DA31B9), + UINT64_C(0x458254ABCC64AA1F), UINT64_C(0xFEFF682148E674C9) }, + { UINT64_C(0x8772F37A021F1488), UINT64_C(0x2E274E18AB56345C), + UINT64_C(0x7C7BE61C29823B76), UINT64_C(0x275DB7B29EEFB39E), + UINT64_C(0x83B10ED4BF5CBCEF), UINT64_C(0x40D7F5B4518E5183) } }, + { { UINT64_C(0x315CCC01F960B41B), UINT64_C(0x90B417C91D99E722), + UINT64_C(0x84AFAA0D013463E0), UINT64_C(0xF133C5D813E6D9E1), + UINT64_C(0xD95C6ADC525B7430), UINT64_C(0x082C61AD7A25106A) }, + { UINT64_C(0xABC1966DBA1CE179), UINT64_C(0xE0578B77A5DB529A), + UINT64_C(0x10988C05EC84107D), UINT64_C(0xFCADE5D71B207F83), + UINT64_C(0x0BEB6FDBC5BA83DB), UINT64_C(0x1C39B86D57537E34) } }, + }, + { + { { UINT64_C(0x5B0B5D692A7AECED), UINT64_C(0x4C03450C01DC545F), + UINT64_C(0x72AD0A4A404A3458), UINT64_C(0x1DE8E2559F467B60), + UINT64_C(0xA4B3570590634809), UINT64_C(0x76F30205706F0178) }, + { UINT64_C(0x588D21AB4454F0E5), UINT64_C(0xD22DF54964134928), + UINT64_C(0xF4E7E73D241BCD90), UINT64_C(0xB8D8A1D22FACC7CC), + UINT64_C(0x483C35A71D25D2A0), UINT64_C(0x7F8D25451EF9F608) } }, + { { UINT64_C(0xCB51F03954EBC926), UINT64_C(0xE235D356B8D4A7BB), + UINT64_C(0x93C8FAFAB41FE1A6), UINT64_C(0x6297701DA719F254), + UINT64_C(0x6E9165BC644F5CDE), UINT64_C(0x6506329D0C11C542) }, + { UINT64_C(0xA2564809A92B4250), UINT64_C(0x0E9AC173889C2E3E), + UINT64_C(0x286A592622B1D1BE), UINT64_C(0x86A3D7526ECDD041), + UINT64_C(0x4B867E0A649F9524), UINT64_C(0x1FE7D95A0629CB0F) } }, + { { UINT64_C(0xF4F66843CA5BAF54), UINT64_C(0x298DB357EFE7DB78), + UINT64_C(0xF607E86E7365712F), UINT64_C(0xD58822988A822BC0), + UINT64_C(0x2CFBD63AC61299B3), UINT64_C(0x6F713D9B67167B1A) }, + { UINT64_C(0x750F673FDE0B077A), UINT64_C(0x07482708EE2178DA), + UINT64_C(0x5E6D5BD169123C75), UINT64_C(0x6A93D1B6EAB99B37), + UINT64_C(0x6EF4F7E68CAEC6A3), UINT64_C(0x7BE411D6CF3ED818) } }, + { { UINT64_C(0xF92B307363A0A7D2), UINT64_C(0x32DA431C881DC8CF), + UINT64_C(0xE51BD5EDC578E3A3), UINT64_C(0xEFDA70D29587FA22), + UINT64_C(0xCFEC17089B2EBA85), UINT64_C(0x6AB51A4BAF7BA530) }, + { UINT64_C(0x5AC155AE98174812), UINT64_C(0xCAF07A71CCB076E3), + UINT64_C(0x280E86C2C38718A7), UINT64_C(0x9D12DE73D63745B7), + UINT64_C(0x0E8EA855BF8A79AA), UINT64_C(0x5EB2BED8BD705BF7) } }, + { { UINT64_C(0x33FE9578AE16DE53), UINT64_C(0x3AE85EB510BEC902), + UINT64_C(0xC4F4965844AF850E), UINT64_C(0x6EA222B3087DD658), + UINT64_C(0xB255E6FDA51F1447), UINT64_C(0xB35E4997117E3F48) }, + { UINT64_C(0x562E813B05616CA1), UINT64_C(0xDF5925D68A61E156), + UINT64_C(0xB2FA8125571C728B), UINT64_C(0x00864805A2F2D1CF), + UINT64_C(0x2DC26F411BCCB6FF), UINT64_C(0xEBD5E09363AE37DD) } }, + { { UINT64_C(0xD2D68BB30A285611), UINT64_C(0x3EAE7596DC8378F2), + UINT64_C(0x2DC6CCC66CC688A3), UINT64_C(0xC45E5713011F5DFB), + UINT64_C(0x6B9C4F6C62D34487), UINT64_C(0xFAD6F0771FC65551) }, + { UINT64_C(0x5E3266E062B23B52), UINT64_C(0xF1DAF319E98F4715), + UINT64_C(0x064D12EA3ED0AE83), UINT64_C(0x5CCF9326564125CB), + UINT64_C(0x09057022C63C1E9F), UINT64_C(0x7171972CDC9B5D2E) } }, + { { UINT64_C(0x2364FD9AEABD21B2), UINT64_C(0x3CE5F4BB9174AD6D), + UINT64_C(0xA4D6D5D0B38688C0), UINT64_C(0x2292A2D26D87FD7D), + UINT64_C(0x2A7D1B534CA02E54), UINT64_C(0x7BEE6E7EB4185715) }, + { UINT64_C(0x73E546098FC63ACD), UINT64_C(0xF4D93A124064E09D), + UINT64_C(0xD20E157A2B92DAA5), UINT64_C(0x90D125DBC4B81A00), + UINT64_C(0xCB951C9E7682DE13), UINT64_C(0x1ABE58F427987545) } }, + { { UINT64_C(0x6D35164030C70C8D), UINT64_C(0x8047D811CE2361B8), + UINT64_C(0x3F8B3D4FDF8E2C81), UINT64_C(0x5D59547733FA1F6C), + UINT64_C(0xF769FE5AE29B8A91), UINT64_C(0x26F0E606D737B2A2) }, + { UINT64_C(0x70CBFA5DB8B31C6A), UINT64_C(0x0F883B4A863D3AEA), + UINT64_C(0x156A4479E386AE2F), UINT64_C(0xA17A2FCDADE8A684), + UINT64_C(0x78BDF958E2A7E335), UINT64_C(0xD1B4E6733B9E3041) } }, + { { UINT64_C(0x1EAF48EC449A6D11), UINT64_C(0x6B94B8E46D2FA7B9), + UINT64_C(0x1D75D269728E4C1B), UINT64_C(0x91123819DD304E2C), + UINT64_C(0x0B34CAE388804F4B), UINT64_C(0x2BA192FBC5495E9A) }, + { UINT64_C(0xC93FF6EFFF4D24BF), UINT64_C(0xF8C2C0B00342BA78), + UINT64_C(0x8041F769831EB94C), UINT64_C(0x353100747782985E), + UINT64_C(0xC755320B3AF84E83), UINT64_C(0x384B6D266F497E7F) } }, + { { UINT64_C(0xEF92CD5917E6BD17), UINT64_C(0xA087305BA426965C), + UINT64_C(0x13895CE7AC47F773), UINT64_C(0xB85F2A9FE0BB2867), + UINT64_C(0x2926E6AA7CD7C58E), UINT64_C(0xE544EDA6450459C5) }, + { UINT64_C(0x73DBC351B90A9849), UINT64_C(0x961183F6848EBE86), + UINT64_C(0xC45BB21080534712), UINT64_C(0x379D08D7A654D9A3), + UINT64_C(0x5B97CEF2BD3FFA9C), UINT64_C(0x0F469F34DDC2FCE5) } }, + { { UINT64_C(0x6D1461080642F38D), UINT64_C(0x055171A0D21EB887), + UINT64_C(0x28DFFAB4D0DCEB28), UINT64_C(0x0D0E631298DE9CCD), + UINT64_C(0x750A9156118C3C3F), UINT64_C(0x8C1F1390B049D799) }, + { UINT64_C(0xE4823858439607C5), UINT64_C(0x947E9BA05C111EAB), + UINT64_C(0x39C95616A355DF2E), UINT64_C(0xF5F6B98E10E54BDA), + UINT64_C(0xB0E0B33D142B876A), UINT64_C(0x71197D73EA18C90C) } }, + { { UINT64_C(0x36A5139DF52BE819), UINT64_C(0xF60DDF3429A45D2B), + UINT64_C(0x0727EFECE9220E34), UINT64_C(0x431D33864EF7F446), + UINT64_C(0xC3165A64FCC4962C), UINT64_C(0xB7D926E1D64362BB) }, + { UINT64_C(0x216BC61FD45F9350), UINT64_C(0xA974CB2FBBAED815), + UINT64_C(0x31DF342D86FB2F76), UINT64_C(0x3AB67E0501D78314), + UINT64_C(0x7AA951E0DEE33ED2), UINT64_C(0x318FBBBDCEC78D94) } }, + { { UINT64_C(0xAD7EFB65B8FE0204), UINT64_C(0x0432E1C5230AB7F7), + UINT64_C(0x7563A62D9C967400), UINT64_C(0xD88B9C743524D4FF), + UINT64_C(0x16A1991CF1A823E3), UINT64_C(0xCF2F9BFEFA6F0FFB) }, + { UINT64_C(0x55AAA946A50CA61F), UINT64_C(0x8CBBD3C8FED4CAB3), + UINT64_C(0x03A0FAB87651365A), UINT64_C(0x46B5234B62DC3913), + UINT64_C(0xFD875B28B558CBBD), UINT64_C(0xA48EC3AE11CEB361) } }, + { { UINT64_C(0x5DD131A1B3ADBD8B), UINT64_C(0xF9FBCA3A29B45EF8), + UINT64_C(0x022048669341EE18), UINT64_C(0x8D13B89583BF9618), + UINT64_C(0x0E395BAEE807459C), UINT64_C(0xB9C110CCB190E7DB) }, + { UINT64_C(0xA0DC345225D25063), UINT64_C(0x2FB78EC802371462), + UINT64_C(0xC3A9E7BB8975C2D5), UINT64_C(0x9466687285A78264), + UINT64_C(0x480D2CC28029AA92), UINT64_C(0x237086C75655726D) } }, + { { UINT64_C(0x197F14BB65EB9EEE), UINT64_C(0xFC93125C9F12E5FD), + UINT64_C(0x9C20BC538BFBAE5E), UINT64_C(0xB35E21544BC053BA), + UINT64_C(0xE5FA9CC721C3898E), UINT64_C(0x502D72FFD42F950F) }, + { UINT64_C(0x6812D38AD1EB8C31), UINT64_C(0x1F77F3F1080D30BB), + UINT64_C(0x18D128335A8B1E98), UINT64_C(0x7FD39FA9299196CE), + UINT64_C(0xFB8C9F11CF4ED6D6), UINT64_C(0x4C00F604D6363194) } }, + { { UINT64_C(0x5C8AFCF9FA2A21C2), UINT64_C(0x71CBF2821928D133), + UINT64_C(0x56BEF28E42B29506), UINT64_C(0xAFBA250C70323DE2), + UINT64_C(0x3FE208D17DED2C30), UINT64_C(0xBD2CD213CE9AA598) }, + { UINT64_C(0x52C5EC52CFEED070), UINT64_C(0x0A7223E7D3DA336B), + UINT64_C(0x7156A4EDCE156B46), UINT64_C(0x9AF6C499ED7E6159), + UINT64_C(0x9D7A679713C029AD), UINT64_C(0xE5B5C9249018DC77) } }, + }, + { + { { UINT64_C(0x3F2EFF53DE1E4E55), UINT64_C(0x6B749943E4D3ECC4), + UINT64_C(0xAF10B18A0DDE190D), UINT64_C(0xF491B98DA26B0409), + UINT64_C(0x66080782A2B1D944), UINT64_C(0x59277DC697E8C541) }, + { UINT64_C(0xFDBFC5F6006F18AA), UINT64_C(0x435D165BFADD8BE1), + UINT64_C(0x8E5D263857645EF4), UINT64_C(0x31BCFDA6A0258363), + UINT64_C(0xF5330AB8D35D2503), UINT64_C(0xB71369F0C7CAB285) } }, + { { UINT64_C(0xE6A19DCC40ACC5A8), UINT64_C(0x1C3A1FF1DBC6DBF8), + UINT64_C(0xB4D89B9FC6455613), UINT64_C(0x6CB0FE44A7390D0E), + UINT64_C(0xADE197A459EA135A), UINT64_C(0xDA6AA86520680982) }, + { UINT64_C(0x03DB9BE95A442C1B), UINT64_C(0x221A2D732BFB93F2), + UINT64_C(0x44DEE8D4753C196C), UINT64_C(0x59ADCC700B7C6FF5), + UINT64_C(0xC6260EC24CA1B142), UINT64_C(0x4C3CB5C646CBD4F2) } }, + { { UINT64_C(0x8A15D6FEA417111F), UINT64_C(0xFE4A16BD71D93FCC), + UINT64_C(0x7A7EE38C55BBE732), UINT64_C(0xEFF146A51FF94A9D), + UINT64_C(0xE572D13EDD585AB5), UINT64_C(0xD879790E06491A5D) }, + { UINT64_C(0x9C84E1C52A58CB2E), UINT64_C(0xD79D13746C938630), + UINT64_C(0xDB12CD9B385F06C7), UINT64_C(0x0C93EB977A7759C3), + UINT64_C(0xF1F5B0FE683BD706), UINT64_C(0x541E4F7285EC3D50) } }, + { { UINT64_C(0x9A0E153581833608), UINT64_C(0x5CCE871E6E2833AC), + UINT64_C(0xC17059EAFB29777C), UINT64_C(0x7E40E5FAE354CAFD), + UINT64_C(0x9CF594054D07C371), UINT64_C(0x64CE36B2A71C3945) }, + { UINT64_C(0x69309E9656CAF487), UINT64_C(0x3D719E9F1AE3454B), + UINT64_C(0xF2164070E25823B6), UINT64_C(0xEAD851BD0BC27359), + UINT64_C(0x3D21BFE8B0925094), UINT64_C(0xA783B1E934A97F4E) } }, + { { UINT64_C(0x406B0C269546491A), UINT64_C(0x9E5E15E2F293C4E5), + UINT64_C(0xC60D641315B164DB), UINT64_C(0x0DA46F530C75A78E), + UINT64_C(0x7C599BB7EA0C656B), UINT64_C(0x0F07A5121B1A8122) }, + { UINT64_C(0x14C7204A15172686), UINT64_C(0x8FAEDFF85165625D), + UINT64_C(0x20F260CE37AEDE40), UINT64_C(0xC81F771E8F357FFE), + UINT64_C(0x25499197B0912557), UINT64_C(0x736197DC4C739C74) } }, + { { UINT64_C(0x6151BAB1381B3462), UINT64_C(0x27E5A07843DBD344), + UINT64_C(0x2CB05BD6A1C3E9FB), UINT64_C(0x2A75976027CF2A11), + UINT64_C(0x0ADCF9DBFF43E702), UINT64_C(0x4BBF03E21F484146) }, + { UINT64_C(0x0E74997F55B6521A), UINT64_C(0x15629231ADE17086), + UINT64_C(0x7F143E867493FC58), UINT64_C(0x60869095AF8B9670), + UINT64_C(0x482CFCD77E524869), UINT64_C(0x9E8060C31D454756) } }, + { { UINT64_C(0xE495747AC88B4D3B), UINT64_C(0xB7559835AE8A948F), + UINT64_C(0x67EEF3A9DEB56853), UINT64_C(0x0E20E2699DEE5ADF), + UINT64_C(0x9031AF6761F0A1AA), UINT64_C(0x76669D32683402BC) }, + { UINT64_C(0x90BD231306718B16), UINT64_C(0xE1B22A21864EFDAC), + UINT64_C(0xE4FFE9096620089F), UINT64_C(0xB84C842E3428E2D9), + UINT64_C(0x0E28C880FE3871FC), UINT64_C(0x8932F6983F21C200) } }, + { { UINT64_C(0x603F00CE6C90EA5D), UINT64_C(0x6473930740A2F693), + UINT64_C(0xAF65148B2174E517), UINT64_C(0x162FC2CAF784AE74), + UINT64_C(0x0D9A88254D5F6458), UINT64_C(0x0C2D586143AACE93) }, + { UINT64_C(0xBF1EADDE9F73CBFC), UINT64_C(0xDE9C34C09C68BBCA), + UINT64_C(0x6D95602D67EF8A1A), UINT64_C(0x0AF2581BA791B241), + UINT64_C(0x14F7736112CAD604), UINT64_C(0x19F2354DE2ACD1AD) } }, + { { UINT64_C(0x272F78F60D60F263), UINT64_C(0xE7A8F4AF208FD785), + UINT64_C(0x10E191C636554F2C), UINT64_C(0x06D88551FD5CD0B3), + UINT64_C(0x29BF856857069C27), UINT64_C(0x3CE7ECD828AA6FAD) }, + { UINT64_C(0x7D8A92D0E9F1A1D8), UINT64_C(0xD40C7FF8D30B5725), + UINT64_C(0x16BE6CB2F54CAEB8), UINT64_C(0x14CA471A14CB0A91), + UINT64_C(0xD5FF15B802733CAE), UINT64_C(0xCAF88D87DAA76580) } }, + { { UINT64_C(0x39430E222C046592), UINT64_C(0x6CDAE81F1AD26706), + UINT64_C(0x8C102159A25D9106), UINT64_C(0x9A44057227CA9F30), + UINT64_C(0x8D34C43070287FBC), UINT64_C(0x9003A45529DB8AFA) }, + { UINT64_C(0x91364CC37FD971AD), UINT64_C(0x7B3AA0489C60EDB7), + UINT64_C(0x58B0E008526F4DD8), UINT64_C(0xB7674454D86D98AE), + UINT64_C(0xC25F4051B2B45747), UINT64_C(0x8243BF9CCC043E8F) } }, + { { UINT64_C(0xA89641C643A0C387), UINT64_C(0x6D92205C87B9AB17), + UINT64_C(0x37D691F4DAA0E102), UINT64_C(0xEB3E52D7CDE5312E), + UINT64_C(0x60D3C09916F518A2), UINT64_C(0x7854C0518A378EEB) }, + { UINT64_C(0x7359DB514BBCAAC5), UINT64_C(0xF5B1B68C1713F102), + UINT64_C(0xDAEAE645E4398DE5), UINT64_C(0x8C8ACB6CD1ABFB82), + UINT64_C(0x2E8B76C3136423E2), UINT64_C(0x509DCB2DA8BA015E) } }, + { { UINT64_C(0x2FF368159AD9C59C), UINT64_C(0xB189A4E8658E65B9), + UINT64_C(0x7D33DDBBEA786AD2), UINT64_C(0x96D0D648C0D2DC05), + UINT64_C(0x05E49256BFA03BE9), UINT64_C(0x0EA4E7A68BAF5A1C) }, + { UINT64_C(0x3DDCE0B09F9AD5A8), UINT64_C(0xF78091959E49C2CB), + UINT64_C(0xBFCEF29D21782C2F), UINT64_C(0xE57AD39FC41BFD97), + UINT64_C(0xC04B93E81355AD19), UINT64_C(0xAABC9E6E59440F9F) } }, + { { UINT64_C(0x7AA481035B6459DA), UINT64_C(0x83EF74770166E880), + UINT64_C(0x536182B1511CCE80), UINT64_C(0xAFDD2EEE73CA55AA), + UINT64_C(0xAB910D0DA8716143), UINT64_C(0x8BEAA42B83707250) }, + { UINT64_C(0x4BCCFD898DA2AB3D), UINT64_C(0x1DBF68A9EC6AA105), + UINT64_C(0x32CE610868EB42DA), UINT64_C(0x5C2C2C858EA62E37), + UINT64_C(0x1ED2791FCD3088A7), UINT64_C(0x496B4FEBFF05070C) } }, + { { UINT64_C(0x9FA9121A0AA629C5), UINT64_C(0xE286CFF157558BEC), + UINT64_C(0x4D9D657E59813A4D), UINT64_C(0xC4676A1626103519), + UINT64_C(0x616160B32BD4DF80), UINT64_C(0x26FB78CC30FBAE87) }, + { UINT64_C(0x096070138F0F66BD), UINT64_C(0xDD4E2D0C03D9B90D), + UINT64_C(0x5D3A8912600D1B12), UINT64_C(0xF76DD52F4308E126), + UINT64_C(0x97CC04099E4FCCA6), UINT64_C(0x0CFBE31104C4DF7B) } }, + { { UINT64_C(0x6CA62C1228437A23), UINT64_C(0x0DAF335340E7A003), + UINT64_C(0x1FD07DF0D20F8079), UINT64_C(0xEAE7969C3BBC9749), + UINT64_C(0x55861AFA9ECAD022), UINT64_C(0xEC41DAD91FBC3D4C) }, + { UINT64_C(0x1FE4CB40DA8B261B), UINT64_C(0xC2671AB6427C5C9D), + UINT64_C(0xDFCDA7B8261D4939), UINT64_C(0x9E7B802B2072C0B9), + UINT64_C(0x3AFEE900C7828CC2), UINT64_C(0x3488BF28F6DE987F) } }, + { { UINT64_C(0x33B9F2DE7BE1F89E), UINT64_C(0xD4E80821299B15C9), + UINT64_C(0x87A3067A0E13F37F), UINT64_C(0x6D4C09ED55FD239F), + UINT64_C(0x48B1042D92EF014F), UINT64_C(0xA382B2E0B385A759) }, + { UINT64_C(0xBF571BB07F6F84F8), UINT64_C(0x25AFFA370CE87F50), + UINT64_C(0x826906D3FE54F1BC), UINT64_C(0x6B0421F4C53AE76A), + UINT64_C(0x44F85A3A4855EB3C), UINT64_C(0xF49E21518D1F2B27) } }, + }, + { + { { UINT64_C(0xC0426B775E3C647B), UINT64_C(0xBFCBD9398CF05348), + UINT64_C(0x31D312E3172C0D3D), UINT64_C(0x5F49FDE6EE754737), + UINT64_C(0x895530F06DA7EE61), UINT64_C(0xCF281B0AE8B3A5FB) }, + { UINT64_C(0xFD14973541B8A543), UINT64_C(0x41A625A73080DD30), + UINT64_C(0xE2BAAE07653908CF), UINT64_C(0xC3D01436BA02A278), + UINT64_C(0xA0D0222E7B21B8F8), UINT64_C(0xFDC270E9D7EC1297) } }, + { { UINT64_C(0x06A67BD29F101E64), UINT64_C(0xCB6E0AC7E1733A4A), + UINT64_C(0xEE0B5D5197BC62D2), UINT64_C(0x52B1703924C51874), + UINT64_C(0xFED1F42382A1A0D5), UINT64_C(0x55D90569DB6270AC) }, + { UINT64_C(0x36BE4A9C5D73D533), UINT64_C(0xBE9266D6976ED4D5), + UINT64_C(0xC17436D3B8F8074B), UINT64_C(0x3BB4D399718545C6), + UINT64_C(0x8E1EA3555C757D21), UINT64_C(0xF7EDBC978C474366) } }, + { { UINT64_C(0xEC72C6506EA83242), UINT64_C(0xF7DE7BE51B2D237F), + UINT64_C(0x3C5E22001819EFB0), UINT64_C(0xDF5AB6D68CDDE870), + UINT64_C(0x75A44E9D92A87AEE), UINT64_C(0xBDDC46F4BCF77F19) }, + { UINT64_C(0x8191EFBD669B674D), UINT64_C(0x52884DF9ED71768F), + UINT64_C(0xE62BE58265CF242C), UINT64_C(0xAE99A3B180B1D17B), + UINT64_C(0x48CBB44692DE59A9), UINT64_C(0xD3C226CF2DCB3CE2) } }, + { { UINT64_C(0x9580CDFB9FD94EC4), UINT64_C(0xED273A6C28631AD9), + UINT64_C(0x5D3D5F77C327F3E7), UINT64_C(0x05D5339C35353C5F), + UINT64_C(0xC56FB5FE5C258EB1), UINT64_C(0xEFF8425EEDCE1F79) }, + { UINT64_C(0xAB7AA141CF83CF9C), UINT64_C(0xBD2A690A207D6D4F), + UINT64_C(0xE1241491458D9E52), UINT64_C(0xDD2448CCAA7F0F31), + UINT64_C(0xEC58D3C7F0FDA7AB), UINT64_C(0x7B6E122DC91BBA4D) } }, + { { UINT64_C(0x2A2DEDAFB1B48156), UINT64_C(0xA0A2C63ABB93DB87), + UINT64_C(0xC655907808ACD99E), UINT64_C(0x03EA42AFFE4AC331), + UINT64_C(0x43D2C14AEB180ED6), UINT64_C(0xC2F293DDB1156A1A) }, + { UINT64_C(0x1FAFABF5A9D81249), UINT64_C(0x39ADDEAD9A8EEE87), + UINT64_C(0x21E206F2119E2E92), UINT64_C(0xBC5DCC2ED74DCEB6), + UINT64_C(0x86647FA30A73A358), UINT64_C(0xEAD8BEA42F53F642) } }, + { { UINT64_C(0x636225F591C09091), UINT64_C(0xCCF5070A71BDCFDF), + UINT64_C(0x0EF8D625B9668EE2), UINT64_C(0x57BDF6CDB5E04E4F), + UINT64_C(0xFC6AB0A67C75EA43), UINT64_C(0xEB6B8AFBF7FD6EF3) }, + { UINT64_C(0x5B2AEEF02A3DF404), UINT64_C(0x31FD3B48B9823197), + UINT64_C(0x56226DB683A7EB23), UINT64_C(0x3772C21E5BB1ED2F), + UINT64_C(0x3E833624CD1ABA6A), UINT64_C(0xBAE58FFAAC672DAD) } }, + { { UINT64_C(0xCE92224D31BA1705), UINT64_C(0x022C6ED2F0197F63), + UINT64_C(0x21F18D99A4DC1113), UINT64_C(0x5CD04DE803616BF1), + UINT64_C(0x6F9006799FF12E08), UINT64_C(0xF59A331548E61DDF) }, + { UINT64_C(0x9474D42CB51BD024), UINT64_C(0x11A0A4139051E49D), + UINT64_C(0x79C92705DCE70EDB), UINT64_C(0x113CE27834198426), + UINT64_C(0x8978396FEA8616D2), UINT64_C(0x9A2A14D0EA894C36) } }, + { { UINT64_C(0x4F1E1254604F6E4A), UINT64_C(0x4513B0880187D585), + UINT64_C(0x9022F25719E0F482), UINT64_C(0x51FB2A80E2239DBF), + UINT64_C(0x49940D9E998ED9D5), UINT64_C(0x0583D2416C932C5D) }, + { UINT64_C(0x1188CEC8F25B73F7), UINT64_C(0xA28788CB3B3D06CD), + UINT64_C(0xDEA194ECA083DB5A), UINT64_C(0xD93A4F7E22DF4272), + UINT64_C(0x8D84E4BF6A009C49), UINT64_C(0x893D8DD93E3E4A9E) } }, + { { UINT64_C(0x35E909EA33D31160), UINT64_C(0x5020316857172F1E), + UINT64_C(0x2707FC4451F3D866), UINT64_C(0xEB9D2018D2442A5D), + UINT64_C(0x904D72095DBFE378), UINT64_C(0x6DB132A35F13CF77) }, + { UINT64_C(0x9D842BA67A3AF54B), UINT64_C(0x4E16EA195AA5B4F9), + UINT64_C(0x2BBA457CAF24228E), UINT64_C(0xCC04B3BB16F3C5FE), + UINT64_C(0xBAFAC51677E64944), UINT64_C(0x31580A34F08BCEE0) } }, + { { UINT64_C(0xC6808DEE20C30ACA), UINT64_C(0xDADD216FA3EA2056), + UINT64_C(0xD331394E7A4A9F9D), UINT64_C(0x9E0441AD424C4026), + UINT64_C(0xAEED102F0AEB5350), UINT64_C(0xC6697FBBD45B09DA) }, + { UINT64_C(0x52A2590EDEAC1496), UINT64_C(0x7142B831250B87AF), + UINT64_C(0xBEF2E68B6D0784A8), UINT64_C(0x5F62593AA5F71CEF), + UINT64_C(0x3B8F7616B5DA51A3), UINT64_C(0xC7A6FA0DB680F5FE) } }, + { { UINT64_C(0x36C21DE699C8227C), UINT64_C(0xBEE3E867C26813B1), + UINT64_C(0x9B05F2E6BDD91549), UINT64_C(0x34FF2B1FA7D1110F), + UINT64_C(0x8E6953B937F67FD0), UINT64_C(0x56C7F18BC3183E20) }, + { UINT64_C(0x48AF46DE9E2019ED), UINT64_C(0xDEAF972EF551BBBF), + UINT64_C(0x88EE38F8CC5E3EEF), UINT64_C(0xFB8D7A44392D6BAF), + UINT64_C(0x32293BFC0127187D), UINT64_C(0x7689E767E58647CC) } }, + { { UINT64_C(0x00CE901B52168013), UINT64_C(0xC6BF8E38837AAE71), + UINT64_C(0xD6F11EFA167677D8), UINT64_C(0xE53BB48586C8E5CF), + UINT64_C(0x671167CEC48E74AB), UINT64_C(0x8A40218C8AD720A7) }, + { UINT64_C(0x81E827A6E7C1191A), UINT64_C(0x54058F8DADDB153D), + UINT64_C(0x0BAF29250D950FA2), UINT64_C(0xC244674D576DDA13), + UINT64_C(0x8C4630AE41BCD13B), UINT64_C(0x6C2127BF5A077419) } }, + { { UINT64_C(0xCF977FD5A83C501F), UINT64_C(0xD7C6DF36B6AB176F), + UINT64_C(0x117F6331397BC6B5), UINT64_C(0x72A6078BF7A2D491), + UINT64_C(0xE5A2AAED5242FE2E), UINT64_C(0x88ECFFDCFEBDC212) }, + { UINT64_C(0xF2DBBF50CE33BA21), UINT64_C(0xE1343B76CEB19F07), + UINT64_C(0x1F32D4C9D2C28F71), UINT64_C(0x93FC64B418587685), + UINT64_C(0x39CEEF9BBA1F8BD1), UINT64_C(0x99C36A788D6D6BB0) } }, + { { UINT64_C(0x0D0638173E9561CF), UINT64_C(0x1D8646AA3D33704D), + UINT64_C(0x8C4513847A08BA33), UINT64_C(0x96446BD3E02D6624), + UINT64_C(0x749849F02D6F4166), UINT64_C(0xE364DA0114268BF0) }, + { UINT64_C(0x7CE4587E9AEBFCFD), UINT64_C(0xD468606456234393), + UINT64_C(0x00231D5116DF73B2), UINT64_C(0xF6A969B77279C78C), + UINT64_C(0x1FF1F6B66CB4117C), UINT64_C(0x30AEBC39D3EAB680) } }, + { { UINT64_C(0x5CC97E6493EF00B9), UINT64_C(0xDAE13841972345AE), + UINT64_C(0x858391844788F43C), UINT64_C(0xD0FF521EE2E6CF3E), + UINT64_C(0xAED14A5B4B707C86), UINT64_C(0x7EAAE4A6D2523CF7) }, + { UINT64_C(0x266472C5024C8AC6), UINT64_C(0xE47E1522C0170051), + UINT64_C(0x7B83DA6173826BAE), UINT64_C(0xE97E19F5CF543F0D), + UINT64_C(0x5D5248FA20BF38E2), UINT64_C(0x8A7C2F7DDF56A037) } }, + { { UINT64_C(0xB04659DD87B0526C), UINT64_C(0x593C604A2307565E), + UINT64_C(0x49E522257C630AB8), UINT64_C(0x24C1D0C6DCE9CD23), + UINT64_C(0x6FDB241C85177079), UINT64_C(0x5F521D19F250C351) }, + { UINT64_C(0xFB56134BA6FB61DF), UINT64_C(0xA4E70D69D75C07ED), + UINT64_C(0xB7A824487D8825A8), UINT64_C(0xA3AEA7D4DD64BBCC), + UINT64_C(0xD53E6E6C8692F539), UINT64_C(0x8DDDA83BF7AA4BC0) } }, + }, + { + { { UINT64_C(0x140A0F9FDD93D50A), UINT64_C(0x4799FFDE83B7ABAC), + UINT64_C(0x78FF7C2304A1F742), UINT64_C(0xC0568F51195BA34E), + UINT64_C(0xE97183603B7F78B4), UINT64_C(0x9CFD1FF1F9EFAA53) }, + { UINT64_C(0xE924D2C5BB06022E), UINT64_C(0x9987FA86FAA2AF6D), + UINT64_C(0x4B12E73F6EE37E0F), UINT64_C(0x1836FDFA5E5A1DDE), + UINT64_C(0x7F1B92259DCD6416), UINT64_C(0xCB2C1B4D677544D8) } }, + { { UINT64_C(0x0254486D9C213D95), UINT64_C(0x68A9DB56CB2F6E94), + UINT64_C(0xFB5858BA000F5491), UINT64_C(0x1315BDD934009FB6), + UINT64_C(0xB18A8E0AC42BDE30), UINT64_C(0xFDCF93D1F1070358) }, + { UINT64_C(0xBEB1DB753022937E), UINT64_C(0x9B9ECA7ACAC20DB4), + UINT64_C(0x152214D4E4122B20), UINT64_C(0xD3E673F2AABCCC7B), + UINT64_C(0x94C50F64AED07571), UINT64_C(0xD767059AE66B4F17) } }, + { { UINT64_C(0x40336B12DCD6D14B), UINT64_C(0xF6BCFF5DE3B4919C), + UINT64_C(0xC337048D9C841F0C), UINT64_C(0x4CE6D0251D617F50), + UINT64_C(0x00FEF2198117D379), UINT64_C(0x18B7C4E9F95BE243) }, + { UINT64_C(0x98DE119E38DF08FF), UINT64_C(0xDFD803BD8D772D20), + UINT64_C(0x94125B720F9678BD), UINT64_C(0xFC5B57CD334ACE30), + UINT64_C(0x09486527B7E86E04), UINT64_C(0xFE9F8BCC6E552039) } }, + { { UINT64_C(0x3B75C45BD6F5A10E), UINT64_C(0xFD4680F4C1C35F38), + UINT64_C(0x5450227DF8E0A113), UINT64_C(0x5E69F1AE73DDBA24), + UINT64_C(0x2007B80E57F24645), UINT64_C(0xC63695DC3D159741) }, + { UINT64_C(0xCBE54D294530F623), UINT64_C(0x986AD5732869586B), + UINT64_C(0xE19F70594CC39F73), UINT64_C(0x80F00AB32B1B8DA9), + UINT64_C(0xB765AAF973F68D26), UINT64_C(0xBC79A394E993F829) } }, + { { UINT64_C(0x9C441043F310D2A0), UINT64_C(0x2865EE58DC5EB106), + UINT64_C(0x71A959229CB8065C), UINT64_C(0x8EB3A733A052AF0F), + UINT64_C(0x56009F42B09D716E), UINT64_C(0xA7F923C5ABCBE6AD) }, + { UINT64_C(0x263B7669FA375C01), UINT64_C(0x641C47E521EF27A2), + UINT64_C(0xA89B474EB08FFD25), UINT64_C(0x5BE8EC3FF0A239F3), + UINT64_C(0x0E79957A242A6C5A), UINT64_C(0x1DFB26D00C6C75F5) } }, + { { UINT64_C(0x2FD97B9B9DFBF22A), UINT64_C(0xDEC16CC85643532D), + UINT64_C(0xDF0E6E3960FEE7C3), UINT64_C(0xD09AD7B6545860C8), + UINT64_C(0xCC16E98473FC3B7C), UINT64_C(0x6CE734C10D4E1555) }, + { UINT64_C(0xC6EFE68B4B5F6032), UINT64_C(0x3A64F34C14F54073), + UINT64_C(0x25DA689CAC44DC95), UINT64_C(0x990C477E5358AD8A), + UINT64_C(0x00E958A5F36DA7DE), UINT64_C(0x902B7360C9B6F161) } }, + { { UINT64_C(0x454AB42C9347B90A), UINT64_C(0xCAEBE64AA698B02B), + UINT64_C(0x119CDC69FB86FA40), UINT64_C(0x2E5CB7ADC3109281), + UINT64_C(0x67BB1EC5CD0C3D00), UINT64_C(0x5D430BC783F25BBF) }, + { UINT64_C(0x69FD84A85CDE0ABB), UINT64_C(0x69DA263E9816B688), + UINT64_C(0xE52D93DF0E53CBB8), UINT64_C(0x42CF6F25ADD2D5A7), + UINT64_C(0x227BA59DC87CA88F), UINT64_C(0x7A1CA876DA738554) } }, + { { UINT64_C(0x3FA5C1051CAC82C4), UINT64_C(0x23C760878A78C9BE), + UINT64_C(0xE98CDAD61C5CFA42), UINT64_C(0x09C302520A6C0421), + UINT64_C(0x149BAC7C42FC61B9), UINT64_C(0x3A1C22AC3004A3E2) }, + { UINT64_C(0xDE6B0D6E202C7FED), UINT64_C(0xB2457377E7E63052), + UINT64_C(0x31725FD43706B3EF), UINT64_C(0xE16A347D2B1AFDBF), + UINT64_C(0xBE4850C48C29CF66), UINT64_C(0x8F51CC4D2939F23C) } }, + { { UINT64_C(0x169E025B219AE6C1), UINT64_C(0x55FF526F116E1CA1), + UINT64_C(0x01B810A3B191F55D), UINT64_C(0x2D98127229588A69), + UINT64_C(0x53C9377048B92199), UINT64_C(0x8C7DD84E8A85236F) }, + { UINT64_C(0x293D48B6CAACF958), UINT64_C(0x1F084ACB43572B30), + UINT64_C(0x628BFA2DFAD91F28), UINT64_C(0x8D627B11829386AF), + UINT64_C(0x3EC1DD00D44A77BE), UINT64_C(0x8D3B0D08649AC7F0) } }, + { { UINT64_C(0x00A93DAA177513BF), UINT64_C(0x2EF0B96F42AD79E1), + UINT64_C(0x81F5AAF1A07129D9), UINT64_C(0xFC04B7EF923F2449), + UINT64_C(0x855DA79560CDB1B7), UINT64_C(0xB1EB5DABAD5D61D4) }, + { UINT64_C(0xD2CEF1AE353FD028), UINT64_C(0xC21D54399EE94847), + UINT64_C(0x9ED552BB0380C1A8), UINT64_C(0xB156FE7A2BAC328F), + UINT64_C(0xBB7E01967213C6A4), UINT64_C(0x36002A331701ED5B) } }, + { { UINT64_C(0x20B1632ADDC9EF4D), UINT64_C(0x2A35FF4C272D082B), + UINT64_C(0x30D39923F6CC9BD3), UINT64_C(0x6D879BC2E65C9D08), + UINT64_C(0xCE8274E16FA9983C), UINT64_C(0x652371E80EB7424F) }, + { UINT64_C(0x32B77503C5C35282), UINT64_C(0xD7306333C885A931), + UINT64_C(0x8A16D71972955AA8), UINT64_C(0x5548F1637D51F882), + UINT64_C(0xB311DC66BABA59EF), UINT64_C(0x773D54480DB8F627) } }, + { { UINT64_C(0x59B1B1347A62EB3B), UINT64_C(0x0F8CE157CCEEFB34), + UINT64_C(0x3FE842A8A798CB2B), UINT64_C(0xD01BC6260BF4161D), + UINT64_C(0x55EF6E554D016FDB), UINT64_C(0xCB561503B242B201) }, + { UINT64_C(0x076EBC73AF4199C1), UINT64_C(0x39DEDCBB697244F7), + UINT64_C(0x9D184733040162BC), UINT64_C(0x902992C17F6B5FA6), + UINT64_C(0xAD1DE754BB4952B5), UINT64_C(0x7ACF1B93A121F6C8) } }, + { { UINT64_C(0x7A56867C325C9B9A), UINT64_C(0x1A143999F3DC3D6A), + UINT64_C(0xCE10959003F5BCB8), UINT64_C(0x034E9035D6EEE5B7), + UINT64_C(0x2AFA81C8495DF1BC), UINT64_C(0x5EAB52DC08924D02) }, + { UINT64_C(0xEE6AA014AA181904), UINT64_C(0xE62DEF09310AD621), + UINT64_C(0x6C9792FCC7538A03), UINT64_C(0xA89D3E883E41D789), + UINT64_C(0xD60FA11C9F94AE83), UINT64_C(0x5E16A8C2E0D6234A) } }, + { { UINT64_C(0x87EC053DA9242F3B), UINT64_C(0x99544637F0E03545), + UINT64_C(0xEA0633FF6B7019E9), UINT64_C(0x8CB8AE0768DDDB5B), + UINT64_C(0x892E7C841A811AC7), UINT64_C(0xC7EF19EB73664249) }, + { UINT64_C(0xD1B5819ACD1489E3), UINT64_C(0xF9C80FB0DE45D24A), + UINT64_C(0x045C21A683BB7491), UINT64_C(0xA65325BE73F7A47D), + UINT64_C(0x08D09F0E9C394F0C), UINT64_C(0xE7FB21C6268D4F08) } }, + { { UINT64_C(0xC4CCAB956CA95C18), UINT64_C(0x563FFD56BC42E040), + UINT64_C(0xFA3C64D8E701C604), UINT64_C(0xC88D4426B0ABAFEE), + UINT64_C(0x1A353E5E8542E4C3), UINT64_C(0x9A2D8B7CED726186) }, + { UINT64_C(0xD61CE19042D097FA), UINT64_C(0x6A63E280799A748B), + UINT64_C(0x0F48D0633225486B), UINT64_C(0x848F8FE142A3C443), + UINT64_C(0x2CCDE2508493CEF4), UINT64_C(0x5450A50845E77E7C) } }, + { { UINT64_C(0xD0F4E24803112816), UINT64_C(0xFCAD9DDBCCBE9E16), + UINT64_C(0x177999BF5AE01EA0), UINT64_C(0xD20C78B9CE832DCE), + UINT64_C(0x3CC694FB50C8C646), UINT64_C(0x24D75968C93D4887) }, + { UINT64_C(0x9F06366A87BC08AF), UINT64_C(0x59FAB50E7FD0DF2A), + UINT64_C(0x5FFCC7F76C4CC234), UINT64_C(0x87198DD765F52D86), + UINT64_C(0x5B9C94B0A855DF04), UINT64_C(0xD8BA6C738A067AD7) } }, + }, + { + { { UINT64_C(0x9E9AF3151C4C9D90), UINT64_C(0x8665C5A9D12E0A89), + UINT64_C(0x204ABD9258286493), UINT64_C(0x79959889B2E09205), + UINT64_C(0x0C727A3DFE56B101), UINT64_C(0xF366244C8B657F26) }, + { UINT64_C(0xDE35D954CCA65BE2), UINT64_C(0x52EE1230B0FD41CE), + UINT64_C(0xFA03261F36019FEE), UINT64_C(0xAFDA42D966511D8F), + UINT64_C(0xF63211DD821148B9), UINT64_C(0x7B56AF7E6F13A3E1) } }, + { { UINT64_C(0x47FE47995913E184), UINT64_C(0x5BBE584C82145900), + UINT64_C(0xB76CFA8B9A867173), UINT64_C(0x9BC87BF0514BF471), + UINT64_C(0x37392DCE71DCF1FC), UINT64_C(0xEC3EFAE03AD1EFA8) }, + { UINT64_C(0xBBEA5A3414876451), UINT64_C(0x96E5F5436217090F), + UINT64_C(0x5B3D4ECD9B1665A9), UINT64_C(0xE7B0DF26E329DF22), + UINT64_C(0x18FB438E0BAA808D), UINT64_C(0x90757EBFDD516FAF) } }, + { { UINT64_C(0x1E6F9A95D5A98D68), UINT64_C(0x759EA7DF849DA828), + UINT64_C(0x365D56256E8B4198), UINT64_C(0xE1B9C53B7A4A53F9), + UINT64_C(0x55DC1D50E32B9B16), UINT64_C(0xA4657EBBBB6D5701) }, + { UINT64_C(0x4C270249EACC76E2), UINT64_C(0xBE49EC75162B1CC7), + UINT64_C(0x19A95B610689902B), UINT64_C(0xDD5706BFA4CFC5A8), + UINT64_C(0xD33BDB7314E5B424), UINT64_C(0x21311BD1E69EBA87) } }, + { { UINT64_C(0x75BA2F9B72A21ACC), UINT64_C(0x356688D4A28EDB4C), + UINT64_C(0x3C339E0B610D080F), UINT64_C(0x614AC29333A99C2F), + UINT64_C(0xA5E23AF2AA580AFF), UINT64_C(0xA6BCB860E1FDBA3A) }, + { UINT64_C(0xAA603365B43F9425), UINT64_C(0xAE8D7126F7EE4635), + UINT64_C(0xA2B2524456330A32), UINT64_C(0xC396B5BB9E025AA3), + UINT64_C(0xABBF77FAF8A0D5CF), UINT64_C(0xB322EE30EA31C83B) } }, + { { UINT64_C(0x048813847890E234), UINT64_C(0x387F1159672E70C6), + UINT64_C(0x1468A6147B307F75), UINT64_C(0x56335B52ED85EC96), + UINT64_C(0xDA1BB60FD45BCAE9), UINT64_C(0x4D94F3F0F9FAEADD) }, + { UINT64_C(0x6C6A7183FC78D86B), UINT64_C(0xA425B5C73018DEC6), + UINT64_C(0xB1549C332D877399), UINT64_C(0x6C41C50C92B2BC37), + UINT64_C(0x3A9F380C83EE0DDB), UINT64_C(0xDED5FEB6C4599E73) } }, + { { UINT64_C(0x14D34C210B7F8354), UINT64_C(0x1475A1CD9177CE45), + UINT64_C(0x9F5F764A9B926E4B), UINT64_C(0x77260D1E05DD21FE), + UINT64_C(0x3C882480C4B937F7), UINT64_C(0xC92DCD39722372F2) }, + { UINT64_C(0xF636A1BEEC6F657E), UINT64_C(0xB0E6C3121D30DD35), + UINT64_C(0xFE4B0528E4654EFE), UINT64_C(0x1C4A682021D230D2), + UINT64_C(0x615D2E4898FA45AB), UINT64_C(0x1F35D6D801FDBABF) } }, + { { UINT64_C(0xA636EEB83A7B10D1), UINT64_C(0x4E1AE352F4A29E73), + UINT64_C(0x01704F5FE6BB1EC7), UINT64_C(0x75C04F720EF020AE), + UINT64_C(0x448D8CEE5A31E6A6), UINT64_C(0xE40A9C29208F994B) }, + { UINT64_C(0x69E09A30FD8F9D5D), UINT64_C(0xE6A5F7EB449BAB7E), + UINT64_C(0xF25BC18A2AA1768B), UINT64_C(0x9449E4043C841234), + UINT64_C(0x7A3BF43E016A7BEF), UINT64_C(0xF25803E82A150B60) } }, + { { UINT64_C(0xE44A2A57B215F9E0), UINT64_C(0x38B34DCE19066F0A), + UINT64_C(0x8BB91DAD40BB1BFB), UINT64_C(0x64C9F775E67735FC), + UINT64_C(0xDE14241788D613CD), UINT64_C(0xC5014FF51901D88D) }, + { UINT64_C(0xA250341DF38116B0), UINT64_C(0xF96B9DD49D6CBCB2), + UINT64_C(0x15EC6C7276B3FAC2), UINT64_C(0x88F1952F8124C1E9), + UINT64_C(0x6B72F8EA975BE4F5), UINT64_C(0x23D288FF061F7530) } }, + { { UINT64_C(0xEBFE3E5FAFB96CE3), UINT64_C(0x2275EDFBB1979537), + UINT64_C(0xC37AB9E8C97BA741), UINT64_C(0x446E4B1063D7C626), + UINT64_C(0xB73E2DCED025EB02), UINT64_C(0x1F952B517669EEA7) }, + { UINT64_C(0xABDD00F66069A424), UINT64_C(0x1C0F9D9BDC298BFB), + UINT64_C(0x831B1FD3EB757B33), UINT64_C(0xD7DBE18359D60B32), + UINT64_C(0x663D1F369EF094B3), UINT64_C(0x1BD5732E67F7F11A) } }, + { { UINT64_C(0x3C7FB3F5C75D8892), UINT64_C(0x2CFF9A0CBA68DA69), + UINT64_C(0x76455E8B60EC740B), UINT64_C(0x4B8D67FF167B88F0), + UINT64_C(0xEDEC0C025A4186B1), UINT64_C(0x127C462DBEBF35AB) }, + { UINT64_C(0x9159C67E049430FC), UINT64_C(0x86B21DD2E7747320), + UINT64_C(0x0E0E01520CF27B89), UINT64_C(0x705F28F5CD1316B6), + UINT64_C(0x76751691BEAEA8A8), UINT64_C(0x4C73E282360C5B69) } }, + { { UINT64_C(0x46BCC0D5FD7B3D74), UINT64_C(0x6F13C20E0DC4F410), + UINT64_C(0x98A1AF7D72F11CDF), UINT64_C(0x6099FD837928881C), + UINT64_C(0x66976356371BB94B), UINT64_C(0x673FBA7219B945AB) }, + { UINT64_C(0xE4D8FA6EAED00700), UINT64_C(0xEA2313EC5C71A9F7), + UINT64_C(0xF9ED8268F99D4AEA), UINT64_C(0xADD8916442AB59C7), + UINT64_C(0xB37EB26F3F3A2D45), UINT64_C(0x0B39BD7AA924841E) } }, + { { UINT64_C(0xD811EB32E03CDBBB), UINT64_C(0x12055F1D7CC3610E), + UINT64_C(0x6B23A1A0A9046E3F), UINT64_C(0x4D7121229DD4A749), + UINT64_C(0xB0C2ACA1B1BF0AC3), UINT64_C(0x71EFF575C1B0432F) }, + { UINT64_C(0x6CD814922B44E285), UINT64_C(0x3088BD9CD87E8D20), + UINT64_C(0xACE218E5F567E8FA), UINT64_C(0xB3FA0424CF90CBBB), + UINT64_C(0xADBDA751770734D3), UINT64_C(0xBCD78BAD5AD6569A) } }, + { { UINT64_C(0xCADB31FA7F39641F), UINT64_C(0x3EF3E295825E5562), + UINT64_C(0x4893C633F4094C64), UINT64_C(0x52F685F18ADDF432), + UINT64_C(0x9FD887AB7FDC9373), UINT64_C(0x47A9ADA0E8680E8B) }, + { UINT64_C(0x579313B7F0CD44F6), UINT64_C(0xAC4B8668E188AE2E), + UINT64_C(0x648F43698FB145BD), UINT64_C(0xE0460AB374629E31), + UINT64_C(0xC25F28758FF2B05F), UINT64_C(0x4720C2B62D31EAEA) } }, + { { UINT64_C(0x4603CDF413D48F80), UINT64_C(0x9ADB50E2A49725DA), + UINT64_C(0x8CD3305065DF63F0), UINT64_C(0x58D8B3BBCD643003), + UINT64_C(0x170A4F4AB739826B), UINT64_C(0x857772B51EAD0E17) }, + { UINT64_C(0x01B78152E65320F1), UINT64_C(0xA6B4D845B7503FC0), + UINT64_C(0x0F5089B93DD50798), UINT64_C(0x488F200F5690B6BE), + UINT64_C(0x220B4ADF9E096F36), UINT64_C(0x474D7C9F8CE5BC7C) } }, + { { UINT64_C(0xFED8C058C745F8C9), UINT64_C(0xB683179E291262D1), + UINT64_C(0x26ABD367D15EE88C), UINT64_C(0x29E8EED3F60A6249), + UINT64_C(0xED6008BB1E02D6E1), UINT64_C(0xD82ECF4CA6B12B8D) }, + { UINT64_C(0x9929D021AAE4FA22), UINT64_C(0xBE4DEF14336A1AB3), + UINT64_C(0x529B7E098C80A312), UINT64_C(0xB059188DEE0EB0CE), + UINT64_C(0x1E42979A16DEAB7F), UINT64_C(0x2411034984EE9477) } }, + { { UINT64_C(0xD65246852BE579CC), UINT64_C(0x849316F1C456FDED), + UINT64_C(0xC51B7DA42D1B67DA), UINT64_C(0xC25B539E41BC6D6A), + UINT64_C(0xE3B7CCA3A9BF8BED), UINT64_C(0x813EF18C045C15E4) }, + { UINT64_C(0x5F3789A1697982C4), UINT64_C(0x4C1253698C435566), + UINT64_C(0x00A7AE6EDC0A92C6), UINT64_C(0x1ABC929B2F64A053), + UINT64_C(0xF4925C4C38666B44), UINT64_C(0xA81044B00F3DE7F6) } }, + }, + { + { { UINT64_C(0xBCC88422C2EC3731), UINT64_C(0x78A3E4D410DC4EC2), + UINT64_C(0x745DA1EF2571D6B1), UINT64_C(0xF01C2921739A956E), + UINT64_C(0xEFFD8065E4BFFC16), UINT64_C(0x6EFE62A1F36FE72C) }, + { UINT64_C(0xF49E90D20F4629A4), UINT64_C(0xADD1DCC78CE646F4), + UINT64_C(0xCB78B583B7240D91), UINT64_C(0x2E1A7C3C03F8387F), + UINT64_C(0x16566C223200F2D9), UINT64_C(0x2361B14BAAF80A84) } }, + { { UINT64_C(0xDB1CFFD2B5733309), UINT64_C(0x24BC250B0F9DD939), + UINT64_C(0xA4181E5AA3C1DB85), UINT64_C(0xE5183E51AC55D391), + UINT64_C(0x2793D5EFEFD270D0), UINT64_C(0x7D56F63DC0631546) }, + { UINT64_C(0xECB40A590C1EE59D), UINT64_C(0xE613A9E4BB5BFA2C), + UINT64_C(0xA89B14AB6C5830F9), UINT64_C(0x4DC477DCA03F201E), + UINT64_C(0x5604F5DAC88C54F6), UINT64_C(0xD49264DC2ACFC66E) } }, + { { UINT64_C(0x283DD7F01C4DFA95), UINT64_C(0xB898CC2C62C0B160), + UINT64_C(0xBA08C095870282AA), UINT64_C(0xB02B00D8F4E36324), + UINT64_C(0x53AADDC0604CECF2), UINT64_C(0xF1F927D384DDD24E) }, + { UINT64_C(0x34BC00A0E2ABC9E1), UINT64_C(0x2DA1227D60289F88), + UINT64_C(0x5228EAAACEF68F74), UINT64_C(0x40A790D23C029351), + UINT64_C(0xE0E9AF5C8442E3B7), UINT64_C(0xA3214142A9F141E0) } }, + { { UINT64_C(0x72F4949EF9A58E3D), UINT64_C(0x738C700BA48660A6), + UINT64_C(0x71B04726092A5805), UINT64_C(0xAD5C3C110F5CDB72), + UINT64_C(0xD4951F9E554BFC49), UINT64_C(0xEE594EE56131EBE7) }, + { UINT64_C(0x37DA59F33C1AF0A9), UINT64_C(0xD7AFC73BCB040A63), + UINT64_C(0xD020962A4D89FA65), UINT64_C(0x2610C61E71D824F5), + UINT64_C(0x9C917DA73C050E31), UINT64_C(0x3840F92FE6E7EBFB) } }, + { { UINT64_C(0x50FBD7FE8D8B8CED), UINT64_C(0xC7282F7547D240AE), + UINT64_C(0x79646A471930FF73), UINT64_C(0x2E0BAC4E2F7F5A77), + UINT64_C(0x0EE44FA526127E0B), UINT64_C(0x678881B782BC2AA7) }, + { UINT64_C(0xB9E5D38467F5F497), UINT64_C(0x8F94A7D4A9B7106B), + UINT64_C(0xBF7E0B079D329F68), UINT64_C(0x169B93EA45D192FB), + UINT64_C(0xCCAA946720DBE8C0), UINT64_C(0xD4513A50938F9574) } }, + { { UINT64_C(0x841C96B4054CB874), UINT64_C(0xD75B1AF1A3C26834), + UINT64_C(0x7237169DEE6575F0), UINT64_C(0xD71FC7E50322AADC), + UINT64_C(0xD7A23F1E949E3A8E), UINT64_C(0x77E2D102DD31D8C7) }, + { UINT64_C(0x5AD69D09D10F5A1F), UINT64_C(0x526C9CB4B99D9A0B), + UINT64_C(0x521BB10B972B237D), UINT64_C(0x1E4CD42FA326F342), + UINT64_C(0x5BB6DB27F0F126CA), UINT64_C(0x587AF22CA4A515AD) } }, + { { UINT64_C(0x1123A531B12E542F), UINT64_C(0x1D01A64DB9EB2811), + UINT64_C(0xA4A3515BF2D70F87), UINT64_C(0xFA205234B4BD0270), + UINT64_C(0x74B818305EDA26B9), UINT64_C(0x9305D6E656578E75) }, + { UINT64_C(0xF38E69DE9F11BE19), UINT64_C(0x1E2A5C2344DBE89F), + UINT64_C(0x1077E7BCFD286654), UINT64_C(0xD36698940FCA4741), + UINT64_C(0x893BF904278F8497), UINT64_C(0xD6AC5F83EB3E14F4) } }, + { { UINT64_C(0x327B9DAB488F5F74), UINT64_C(0x2B44F4B8CAB7364F), + UINT64_C(0xB4A6D22D19B6C6BD), UINT64_C(0xA087E613FC77CD3E), + UINT64_C(0x4558E327B0B49BC7), UINT64_C(0x188805BECD835D35) }, + { UINT64_C(0x592F293CC1DC1007), UINT64_C(0xFAEE660F6AF02B44), + UINT64_C(0x5BFBB3BF904035F2), UINT64_C(0xD7C9AE6079C07E70), + UINT64_C(0xC5287DD4234896C2), UINT64_C(0xC4CE4523CB0E4121) } }, + { { UINT64_C(0x3626B40658344831), UINT64_C(0xABCCE3568E55C984), + UINT64_C(0x495CC81C77241602), UINT64_C(0x4FB796766D70DF8F), + UINT64_C(0x6354B37C5B071DCA), UINT64_C(0x2CAD80A48C0FC0AD) }, + { UINT64_C(0x18AADD51F68739B4), UINT64_C(0x1BFBB17747F09C6C), + UINT64_C(0x9355EA19A8FD51C4), UINT64_C(0x3D512A84EE58DB7B), + UINT64_C(0x70842AFDE9237640), UINT64_C(0x36F515CAACAF858D) } }, + { { UINT64_C(0x3DDEC7C47E768B23), UINT64_C(0x97E13C53036D43ED), + UINT64_C(0x871E59253A39AB5F), UINT64_C(0x9AF292DE07E68E2B), + UINT64_C(0x411583494A40112E), UINT64_C(0xCDBB46AF3D4D97E6) }, + { UINT64_C(0x2F8912933C0EBE40), UINT64_C(0x696C7EEE3EBAD1E5), + UINT64_C(0x8A5F3B6933B50D99), UINT64_C(0xB7BC48407ED47DDE), + UINT64_C(0x3A6F8E6C1E6706D8), UINT64_C(0x6A1479433D84BB8F) } }, + { { UINT64_C(0xEC3A9C78603AE8D1), UINT64_C(0xBFE07E37228C29E5), + UINT64_C(0xB0385C5B396DBC2B), UINT64_C(0x7C14FE83DF85F41F), + UINT64_C(0xE2E64676ADFD463E), UINT64_C(0x5BEF10AA8BF9F23D) }, + { UINT64_C(0xFA83EA0DF6BAB6DA), UINT64_C(0xCD0C8BA5966BF7E3), + UINT64_C(0xD62216B498501C2E), UINT64_C(0xB7F298A4C3E69F2D), + UINT64_C(0x42CEF13B9C8740F4), UINT64_C(0xBB317E520DD64307) } }, + { { UINT64_C(0x22B6245C3FFEE775), UINT64_C(0x5C3F60BEB37CE7AA), + UINT64_C(0xDE195D40E1FEC0DF), UINT64_C(0x3BFAFBC5A0A82074), + UINT64_C(0xC36EC86AC72CA86A), UINT64_C(0x5606285113FD43EA) }, + { UINT64_C(0x8686BE808E0B03A4), UINT64_C(0xC3BD1F93D540D440), + UINT64_C(0x13E4EBC0BF96CEC5), UINT64_C(0xE8E239849190C844), + UINT64_C(0x183593A600844802), UINT64_C(0x467168794D206878) } }, + { { UINT64_C(0x358F394DB6F63D19), UINT64_C(0xA75D48496B052194), + UINT64_C(0x584035905C8D7975), UINT64_C(0x86DC9B6B6CBFBD77), + UINT64_C(0x2DB04D77647A51E5), UINT64_C(0x5E9A5B02F8950D88) }, + { UINT64_C(0xCE69A7E5017168B0), UINT64_C(0x94630FACC4843AD3), + UINT64_C(0xB3B9D7361EFC44FF), UINT64_C(0xE729E9B6B14D7F93), + UINT64_C(0xA071FC60E0ED0ABC), UINT64_C(0xFC1A99718C8D9B83) } }, + { { UINT64_C(0x49686031D138E975), UINT64_C(0x648640385A8EF0D1), + UINT64_C(0x32679713E7F7DE49), UINT64_C(0x5913234929D1CD1D), + UINT64_C(0x849AA23A20BE9ED2), UINT64_C(0x15D303E1284B3F33) }, + { UINT64_C(0x37309475B63F9FE9), UINT64_C(0x327BAC8B45B7256A), + UINT64_C(0x291CD227D17FC5D3), UINT64_C(0x8291D8CDA973EDF1), + UINT64_C(0xF3843562437ABA09), UINT64_C(0x33FFB704271D0785) } }, + { { UINT64_C(0x5248D6E447E11E5E), UINT64_C(0x0F66FC3C269C7ED3), + UINT64_C(0x18C0D2B9903E346E), UINT64_C(0xD81D9D974BEAE1B8), + UINT64_C(0x610326B0FC30FDF3), UINT64_C(0x2B13687019A7DFCD) }, + { UINT64_C(0xEC75F70AB9527676), UINT64_C(0x90829F5129A3D897), + UINT64_C(0x92FE180997980302), UINT64_C(0xA3F2498E68474991), + UINT64_C(0x6A66307B0F22BBAD), UINT64_C(0x32014B9120378557) } }, + { { UINT64_C(0x72CD7D553CD98610), UINT64_C(0xC3D560B074504ADF), + UINT64_C(0x23F0A982CEBB5D5D), UINT64_C(0x1431C15BB839DDB8), + UINT64_C(0x7E207CD8CEB72207), UINT64_C(0x28E0A848E7EFB28D) }, + { UINT64_C(0xD22561FE1BD96F6E), UINT64_C(0x04812C1862A8236B), + UINT64_C(0xA0BF2334975491FA), UINT64_C(0x294F42A6435DF87F), + UINT64_C(0x2772B783A5D6F4F6), UINT64_C(0x348F92ED2724F853) } }, + }, + { + { { UINT64_C(0xC20FB9111A42E5E7), UINT64_C(0x075A678B81D12863), + UINT64_C(0x12BCBC6A5CC0AA89), UINT64_C(0x5279C6AB4FB9F01E), + UINT64_C(0xBC8E178911AE1B89), UINT64_C(0xAE74A706C290003C) }, + { UINT64_C(0x9949D6EC79DF3F45), UINT64_C(0xBA18E26296C8D37F), + UINT64_C(0x68DE6EE2DD2275BF), UINT64_C(0xA9E4FFF8C419F1D5), + UINT64_C(0xBC759CA4A52B5A40), UINT64_C(0xFF18CBD863B0996D) } }, + { { UINT64_C(0x73C57FDED7DD47E5), UINT64_C(0xB0FE5479D49A7F5D), + UINT64_C(0xD25C71F1CFB9821E), UINT64_C(0x9427E209CF6A1D68), + UINT64_C(0xBF3C3916ACD24E64), UINT64_C(0x7E9F5583BDA7B8B5) }, + { UINT64_C(0xE7C5F7C8CF971E11), UINT64_C(0xEC16D5D73C7F035E), + UINT64_C(0x818DC472E66B277C), UINT64_C(0x4413FD47B2816F1E), + UINT64_C(0x40F262AF48383C6D), UINT64_C(0xFB0575844F190537) } }, + { { UINT64_C(0x487EDC0708962F6B), UINT64_C(0x6002F1E7190A7E55), + UINT64_C(0x7FC62BEA10FDBA0C), UINT64_C(0xC836BBC52C3DBF33), + UINT64_C(0x4FDFB5C34F7D2A46), UINT64_C(0x824654DEDCA0DF71) }, + { UINT64_C(0x30A076760C23902B), UINT64_C(0x7F1EBB9377FBBF37), + UINT64_C(0xD307D49DFACC13DB), UINT64_C(0x148D673AAE1A261A), + UINT64_C(0xE008F95B52D98650), UINT64_C(0xC76144409F558FDE) } }, + { { UINT64_C(0x17CD6AF69CB16650), UINT64_C(0x86CC27C169F4EEBE), + UINT64_C(0x7E495B1D78822432), UINT64_C(0xFED338E31B974525), + UINT64_C(0x527743D386F3CE21), UINT64_C(0x87948AD3B515C896) }, + { UINT64_C(0x9FDE7039B17F2FB8), UINT64_C(0xA2FA9A5FD9B89D96), + UINT64_C(0x5D46600B36FF74DC), UINT64_C(0x8EA74B048302C3C9), + UINT64_C(0xD560F570F744B5EB), UINT64_C(0xC921023BFE762402) } }, + { { UINT64_C(0xA35AB657FFF4C8ED), UINT64_C(0x017C61248A5FABD7), + UINT64_C(0x5646302509ACDA28), UINT64_C(0x6038D36114CF238A), + UINT64_C(0x1428B1B6AF1B9F07), UINT64_C(0x5827FF447482E95C) }, + { UINT64_C(0xCB997E18780FF362), UINT64_C(0x2B89D702E0BCAC1E), + UINT64_C(0xC632A0B5A837DDC8), UINT64_C(0xF3EFCF1F59762647), + UINT64_C(0xE9BA309A38B0D60A), UINT64_C(0x05DEABDD20B5FB37) } }, + { { UINT64_C(0xD44E5DBACB8AF047), UINT64_C(0x15400CB4943CFE82), + UINT64_C(0xDBD695759DF88B67), UINT64_C(0x8299DB2BB2405A7D), + UINT64_C(0x46E3BF770B1D80CD), UINT64_C(0xC50CF66CE82BA3D9) }, + { UINT64_C(0xB2910A07F2F747A9), UINT64_C(0xF6B669DB5ADC89C1), + UINT64_C(0x3B5EF1A09052B081), UINT64_C(0x0F5D5ED3B594ACE2), + UINT64_C(0xDA30B8D5D5F01320), UINT64_C(0x0D688C5EAAFCD58F) } }, + { { UINT64_C(0x5EEE3A312A161074), UINT64_C(0x6BAAAE56EFE2BE37), + UINT64_C(0xF9787F61E3D78698), UINT64_C(0xC6836B2650630A30), + UINT64_C(0x7445B85D1445DEF1), UINT64_C(0xD72016A2D568A6A5) }, + { UINT64_C(0x9DD6F533E355614F), UINT64_C(0x637E7E5F91E04588), + UINT64_C(0x42E142F3B9FB1391), UINT64_C(0x0D07C05C41AFE5DA), + UINT64_C(0xD7CD25C81394EDF1), UINT64_C(0xEBE6A0FCB99288EE) } }, + { { UINT64_C(0xB8E63B7BBABBAD86), UINT64_C(0x63226A9F90D66766), + UINT64_C(0x263818365CF26666), UINT64_C(0xCCBD142D4CADD0BF), + UINT64_C(0xA070965E9AC29470), UINT64_C(0x6BDCA26025FF23ED) }, + { UINT64_C(0xD4E00FD487DCA7B3), UINT64_C(0xA50978339E0E8734), + UINT64_C(0xF73F162E048173A4), UINT64_C(0xD23F91969C3C2FA2), + UINT64_C(0x9AB98B45E4AC397A), UINT64_C(0x2BAA0300543F2D4B) } }, + { { UINT64_C(0xBBBE15E7C658C445), UINT64_C(0xB8CBCB20C28941D1), + UINT64_C(0x65549BE2027D6540), UINT64_C(0xEBBCA8021E8EF4F4), + UINT64_C(0x18214B4BD2ACA397), UINT64_C(0xCBEC7DE2E31784A3) }, + { UINT64_C(0x96F0533F0116FDF3), UINT64_C(0x68911C905C8F5EE1), + UINT64_C(0x7DE9A3AED568603A), UINT64_C(0x3F56C52C6A3AD7B7), + UINT64_C(0x5BE9AFCA670B4D0E), UINT64_C(0x628BFEEE375DFE2F) } }, + { { UINT64_C(0x97DAE81BDD4ADDB3), UINT64_C(0x12D2CF4E8704761B), + UINT64_C(0x5E820B403247788D), UINT64_C(0x82234B620051CA80), + UINT64_C(0x0C62704D6CB5EA74), UINT64_C(0xDE56042023941593) }, + { UINT64_C(0xB3912A3CF1B04145), UINT64_C(0xE3967CD7AF93688D), + UINT64_C(0x2E2DCD2F58DABB4B), UINT64_C(0x6564836F0E303911), + UINT64_C(0x1F10F19BECE07C5C), UINT64_C(0xB47F07EED8919126) } }, + { { UINT64_C(0xE3545085E9A2EEC9), UINT64_C(0x81866A972C8E51FE), + UINT64_C(0xD2BA7DB550027243), UINT64_C(0x29DAEAB54AE87DE4), + UINT64_C(0x5EF3D4B8684F9497), UINT64_C(0xE2DACE3B9D5D6873) }, + { UINT64_C(0xF012C951FFD29C9C), UINT64_C(0x48289445ADBADA14), + UINT64_C(0x8751F50D89558C49), UINT64_C(0x75511A4F99E35BEE), + UINT64_C(0xEF802D6E7D59AA5F), UINT64_C(0x14FCAD65A2A795E2) } }, + { { UINT64_C(0xC8EB00E808CB8F2C), UINT64_C(0x686075322B45BD86), + UINT64_C(0x7A29B45959969713), UINT64_C(0x5FA15B9BD684201B), + UINT64_C(0x1A853190B9E538EE), UINT64_C(0x4150605CD573D043) }, + { UINT64_C(0xEF011D3BEB9FBB68), UINT64_C(0x6727998266AE32B6), + UINT64_C(0x861B86EA445DE5EC), UINT64_C(0x62837D18A34A50E1), + UINT64_C(0x228C006ABF5F0663), UINT64_C(0xE007FDE7396DB36A) } }, + { { UINT64_C(0xDEE4F8815A916A55), UINT64_C(0x20DC0370F39C82CB), + UINT64_C(0xD9A7161540F09821), UINT64_C(0xD50AD8BFF7273492), + UINT64_C(0xA06F7D1232E7C4BF), UINT64_C(0xFA0F61544C5CEA36) }, + { UINT64_C(0xF4FD9BED5FC49CFE), UINT64_C(0xD8CB45D1C9291678), + UINT64_C(0x94DB86CC7B92C9F2), UINT64_C(0x09CA5F3873C81169), + UINT64_C(0x109F40B0AEED06F0), UINT64_C(0x9F0360B214DCAA0A) } }, + { { UINT64_C(0x4189B70DE12AD3E7), UINT64_C(0x5208ADB210B06607), + UINT64_C(0xEBD8E2A2EE8497FA), UINT64_C(0x61B1BD67E04F2ECB), + UINT64_C(0x0E2DDA724F3F5F99), UINT64_C(0xD5D96740F747B16D) }, + { UINT64_C(0x308A48F6A6BF397F), UINT64_C(0x7021C3E523A93595), + UINT64_C(0xF10B022936470AA0), UINT64_C(0x7761E8EC4E03295B), + UINT64_C(0x16EFEF5807339770), UINT64_C(0x0D55D2DD5DA5DAA2) } }, + { { UINT64_C(0x915EA6A38A22F87A), UINT64_C(0x191151C12E5A088E), + UINT64_C(0x190252F17F1D5CBE), UINT64_C(0xE43F59C33B0EC99B), + UINT64_C(0xBE8588D4FF2A6135), UINT64_C(0x103877CC2ECB4B9F) }, + { UINT64_C(0x8F4147E5023CF92B), UINT64_C(0xC24384CC0CC2085B), + UINT64_C(0x6A2DB4A2D082D311), UINT64_C(0x06283811ED7BA9AE), + UINT64_C(0xE9A3F5322A8E1592), UINT64_C(0xAC20F0F45A59E894) } }, + { { UINT64_C(0x788CAA5274AAB4B1), UINT64_C(0xEB84ABA12FEAFC7E), + UINT64_C(0x31DA71DAAC04FF77), UINT64_C(0x39D12EB924E4D0BF), + UINT64_C(0x4F2F292F87A34EF8), UINT64_C(0x9B324372A237A8ED) }, + { UINT64_C(0xBB2D04B12EE3A82D), UINT64_C(0xED4FF367D18D36B2), + UINT64_C(0x99D231EEA6EA0138), UINT64_C(0x7C2D4F064F92E04A), + UINT64_C(0x78A82AB2CA272FD0), UINT64_C(0x7EC41340AB8CDC32) } }, + }, + { + { { UINT64_C(0xD23658C8D2E15A8C), UINT64_C(0x23F93DF716BA28CA), + UINT64_C(0x6DAB10EC082210F1), UINT64_C(0xFB1ADD91BFC36490), + UINT64_C(0xEDA8B02F9A4F2D14), UINT64_C(0x9060318C56560443) }, + { UINT64_C(0x6C01479E64711AB2), UINT64_C(0x41446FC7E337EB85), + UINT64_C(0x4DCF3C1D71888397), UINT64_C(0x87A9C04E13C34FD2), + UINT64_C(0xFE0E08EC510C15AC), UINT64_C(0xFC0D0413C0F495D2) } }, + { { UINT64_C(0xEB05C516156636C2), UINT64_C(0x2F613ABA090E93FC), + UINT64_C(0xCFD573CD489576F5), UINT64_C(0xE6535380535A8D57), + UINT64_C(0x13947314671436C4), UINT64_C(0x1172FB0C5F0A122D) }, + { UINT64_C(0xAECC7EC1C12F58F6), UINT64_C(0xFE42F9578E41AFD2), + UINT64_C(0xDF96F6523D4221AA), UINT64_C(0xFEF5649F2851996B), + UINT64_C(0x46FB9F26D5CFB67E), UINT64_C(0xB047BFC7EF5C4052) } }, + { { UINT64_C(0x5CBDC442F4484374), UINT64_C(0x6B156957F92452EF), + UINT64_C(0x58A26886C118D02A), UINT64_C(0x87FF74E675AAF276), + UINT64_C(0xB133BE95F65F6EC1), UINT64_C(0xA89B62844B1B8D32) }, + { UINT64_C(0xDD8A8EF309C81004), UINT64_C(0x7F8225DB0CF21991), + UINT64_C(0xD525A6DB26623FAF), UINT64_C(0xF2368D40BAE15453), + UINT64_C(0x55D6A84D84F89FC9), UINT64_C(0xAF38358A86021A3E) } }, + { { UINT64_C(0xBD048BDCFF52E280), UINT64_C(0x8A51D0B2526A1795), + UINT64_C(0x40AAA758A985AC0F), UINT64_C(0x6039BCDCF2C7ACE9), + UINT64_C(0x712092CC6AEC347D), UINT64_C(0x7976D0906B5ACAB7) }, + { UINT64_C(0x1EBCF80D6EED9617), UINT64_C(0xB3A63149B0F404A4), + UINT64_C(0x3FDD3D1AD0B610EF), UINT64_C(0xDD3F6F9498C28AC7), + UINT64_C(0x650B77943A59750F), UINT64_C(0xEC59BAB12D3991AC) } }, + { { UINT64_C(0x01F40E882E552766), UINT64_C(0x1FE3D50966F5354F), + UINT64_C(0x0E46D006B3A8EA7F), UINT64_C(0xF75AB629F831CD6A), + UINT64_C(0xDAD808D791465119), UINT64_C(0x442405AF17EF9B10) }, + { UINT64_C(0xD5FE0A96672BDFCB), UINT64_C(0xA9DFA422355DBDEC), + UINT64_C(0xFDB79AA179B25636), UINT64_C(0xE7F26FFDEECE8AEC), + UINT64_C(0xB59255507EDD5AA2), UINT64_C(0x2C8F6FF08EB3A6C2) } }, + { { UINT64_C(0x88887756757D6136), UINT64_C(0xAD9AC18388B92E72), + UINT64_C(0x92CB2FC48785D3EB), UINT64_C(0xD1A542FE9319764B), + UINT64_C(0xAF4CC78F626A62F8), UINT64_C(0x7F3F5FC926BFFAAE) }, + { UINT64_C(0x0A203D4340AE2231), UINT64_C(0xA8BFD9E0387898E8), + UINT64_C(0x1A0C379C474B7DDD), UINT64_C(0x03855E0A34FD49EA), + UINT64_C(0x02B26223B3EF4AE1), UINT64_C(0x804BD8CFE399E0A3) } }, + { { UINT64_C(0x11A9F3D0DE865713), UINT64_C(0x81E36B6BBDE98821), + UINT64_C(0x324996C86AA891D0), UINT64_C(0x7B95BDC1395682B5), + UINT64_C(0x47BF2219C1600563), UINT64_C(0x7A473F50643E38B4) }, + { UINT64_C(0x0911F50AF5738288), UINT64_C(0xDF947A706F9C415B), + UINT64_C(0xBDB994F267A067F6), UINT64_C(0x3F4BEC1B88BE96CD), + UINT64_C(0x9820E931E56DD6D9), UINT64_C(0xB138F14F0A80F419) } }, + { { UINT64_C(0xA11A1A8F0429077A), UINT64_C(0x2BB1E33D10351C68), + UINT64_C(0x3C25ABFE89459A27), UINT64_C(0x2D0091B86B8AC774), + UINT64_C(0xDAFC78533B2415D9), UINT64_C(0xDE713CF19201680D) }, + { UINT64_C(0x8E5F445D68889D57), UINT64_C(0x608B209C60EABF5B), + UINT64_C(0x10EC0ACCF9CFA408), UINT64_C(0xD5256B9D4D1EE754), + UINT64_C(0xFF866BAB0AA6C18D), UINT64_C(0x9D196DB8ACB90A45) } }, + { { UINT64_C(0xA46D76A9B9B081B2), UINT64_C(0xFC743A1062163C25), + UINT64_C(0xCD2A5C8D7761C392), UINT64_C(0x39BDDE0BBE808583), + UINT64_C(0x7C416021B98E4DFE), UINT64_C(0xF930E56365913A44) }, + { UINT64_C(0xC3555F7E7585CF3C), UINT64_C(0xC737E3833D6333D5), + UINT64_C(0x5B60DBA4B430B03D), UINT64_C(0x42B715EBE7555404), + UINT64_C(0x571BDF5B7C7796E3), UINT64_C(0x33DC62C66DB6331F) } }, + { { UINT64_C(0x3FB9CCB0E61DEE59), UINT64_C(0xC5185F2318B14DB9), + UINT64_C(0x1B2ADC4F845EF36C), UINT64_C(0x195D5B505C1A33AB), + UINT64_C(0x8CEA528E421F59D2), UINT64_C(0x7DFCCECFD2931CEA) }, + { UINT64_C(0x51FFA1D58CF7E3F7), UINT64_C(0xF01B7886BDC9FB43), + UINT64_C(0xD65AB610261A0D35), UINT64_C(0x84BCBAFD7574A554), + UINT64_C(0x4B119956FAD70208), UINT64_C(0xDDC329C24FAB5243) } }, + { { UINT64_C(0x1A08AA579CE92177), UINT64_C(0x3395E557DC2B5C36), + UINT64_C(0xFDFE7041394ED04E), UINT64_C(0xB797EB24C6DFCDDE), + UINT64_C(0x284A6B2ACB9DE5D6), UINT64_C(0xE0BD95C807222765) }, + { UINT64_C(0x114A951B9FE678A7), UINT64_C(0xE7ECD0BD9E4954EC), + UINT64_C(0x7D4096FE79F0B8A9), UINT64_C(0xBDB26E9A09724FE2), + UINT64_C(0x08741AD8F787AF95), UINT64_C(0x2BF9727224045AD8) } }, + { { UINT64_C(0xAB1FEDD9A9451D57), UINT64_C(0xDF4D91DF483E38C9), + UINT64_C(0x2D54D31124E9CF8E), UINT64_C(0x9C2A5AF87A22EEB6), + UINT64_C(0xBD9861EF0A43F123), UINT64_C(0x581EA6A238A18B7B) }, + { UINT64_C(0xAF339C85296470A3), UINT64_C(0xF9603FCDAFD8203E), + UINT64_C(0x95D0535096763C28), UINT64_C(0x15445C16860EC831), + UINT64_C(0x2AFB87286867A323), UINT64_C(0x4B152D6D0C4838BF) } }, + { { UINT64_C(0x45BA0E4F837CACBA), UINT64_C(0x7ADB38AEC0725275), + UINT64_C(0x19C82831942D3C28), UINT64_C(0x94F4731D6D0FE7DD), + UINT64_C(0xC3C07E134898F1E6), UINT64_C(0x76350EACED410B51) }, + { UINT64_C(0x0FA8BECAF99AACFC), UINT64_C(0x2834D86F65FAF9CF), + UINT64_C(0x8E62846A6F3866AF), UINT64_C(0xDAA9BD4F3DFD6A2B), + UINT64_C(0xC27115BBA6132655), UINT64_C(0x83972DF7BD5A32C2) } }, + { { UINT64_C(0xA330CB5BD513B825), UINT64_C(0xAE18B2D3EE37BEC3), + UINT64_C(0xFC3AB80AF780A902), UINT64_C(0xD7835BE2D607DDF1), + UINT64_C(0x8120F7675B6E4C2B), UINT64_C(0xAA8C385967E78CCB) }, + { UINT64_C(0xA8DA8CE2AA0ED321), UINT64_C(0xCB8846FDD766341A), + UINT64_C(0xF2A342EE33DC9D9A), UINT64_C(0xA519E0BED0A18A80), + UINT64_C(0x9CDAA39CAF48DF4C), UINT64_C(0xA4B500CA7E0C19EE) } }, + { { UINT64_C(0x83A7FD2F8217001B), UINT64_C(0x4F6FCF064296A8BA), + UINT64_C(0x7D74864391619927), UINT64_C(0x174C1075941E4D41), + UINT64_C(0x037EDEBDA64F5A6C), UINT64_C(0xCF64DB3A6E29DC56) }, + { UINT64_C(0x150B3ACE37C0B9F4), UINT64_C(0x1323234A7168178B), + UINT64_C(0x1CE47014EF4D1879), UINT64_C(0xA22E374217FB4D5C), + UINT64_C(0x69B81822D985F794), UINT64_C(0x199C21C4081D7214) } }, + { { UINT64_C(0x160BC7A18F04B4D2), UINT64_C(0x79CA81DDB10DE174), + UINT64_C(0xE2A280B02DA1E9C7), UINT64_C(0xB4F6BD991D6A0A29), + UINT64_C(0x57CF3EDD1C5B8F27), UINT64_C(0x7E34FC57158C2FD4) }, + { UINT64_C(0x828CFD89CAC93459), UINT64_C(0x9E631B6FB7AF499F), + UINT64_C(0xF4DC8BC0DA26C135), UINT64_C(0x6128ED3937186735), + UINT64_C(0xBB45538B67BF0BA5), UINT64_C(0x1ADDD4C10064A3AB) } }, + }, + { + { { UINT64_C(0xC32730E8DD14D47E), UINT64_C(0xCDC1FD42C0F01E0F), + UINT64_C(0x2BACFDBF3F5CD846), UINT64_C(0x45F364167272D4DD), + UINT64_C(0xDD813A795EB75776), UINT64_C(0xB57885E450997BE2) }, + { UINT64_C(0xDA054E2BDB8C9829), UINT64_C(0x4161D820AAB5A594), + UINT64_C(0x4C428F31026116A3), UINT64_C(0x372AF9A0DCD85E91), + UINT64_C(0xFDA6E903673ADC2D), UINT64_C(0x4526B8ACA8DB59E6) } }, + { { UINT64_C(0x68FE359DE23A8472), UINT64_C(0x43EB12BD4CE3C101), + UINT64_C(0x0EC652C3FC704935), UINT64_C(0x1EEFF1F952E4E22D), + UINT64_C(0xBA6777CB083E3ADA), UINT64_C(0xAB52D7DC8BEFC871) }, + { UINT64_C(0x4EDE689F497CBD59), UINT64_C(0xC8AE42B927577DD9), + UINT64_C(0xE0F080517AB83C27), UINT64_C(0x1F3D5F252C8C1F48), + UINT64_C(0x57991607AF241AAC), UINT64_C(0xC4458B0AB8A337E0) } }, + { { UINT64_C(0x3DBB3FA651DD1BA9), UINT64_C(0xE53C1C4D545E960B), + UINT64_C(0x35AC6574793CE803), UINT64_C(0xB2697DC783DBCE4F), + UINT64_C(0xE35C5BF2E13CF6B0), UINT64_C(0x35034280B0C4A164) }, + { UINT64_C(0xAA490908D9C0D3C1), UINT64_C(0x2CCE614DCB4D2E90), + UINT64_C(0xF646E96C54D504E4), UINT64_C(0xD74E7541B73310A3), + UINT64_C(0xEAD7159618BDE5DA), UINT64_C(0x96E7F4A8AA09AEF7) } }, + { { UINT64_C(0xA8393A245D6E5F48), UINT64_C(0x2C8D7EA2F9175CE8), + UINT64_C(0xD8824E0255A20268), UINT64_C(0x9DD9A272A446BCC6), + UINT64_C(0xC929CDED5351499B), UINT64_C(0xEA5AD9ECCFE76535) }, + { UINT64_C(0x26F3D7D9DC32D001), UINT64_C(0x51C3BE8343EB9689), + UINT64_C(0x91FDCC06759E6DDB), UINT64_C(0xAC2E1904E302B891), + UINT64_C(0xAD25C645C207E1F7), UINT64_C(0x28A70F0DAB3DEB4A) } }, + { { UINT64_C(0x922D7F9703BEA8F1), UINT64_C(0x3AD820D4584570BE), + UINT64_C(0x0CE0A8503CD46B43), UINT64_C(0x4C07911FAE66743D), + UINT64_C(0x66519EB9FDA60023), UINT64_C(0x7F83004BEC2ACD9C) }, + { UINT64_C(0x001E0B80C3117EAD), UINT64_C(0xBB72D5410722BA25), + UINT64_C(0x3AF7DB966E9A5078), UINT64_C(0x86C5774E701B6B4C), + UINT64_C(0xBD2C0E8E37824DB5), UINT64_C(0x3AE3028CBFAC286D) } }, + { { UINT64_C(0x83D4D4A8A33E071B), UINT64_C(0x881C0A9261444BB5), + UINT64_C(0xEEA1E292520E3BC3), UINT64_C(0x5A5F4C3C2AAAB729), + UINT64_C(0x0B766C5EE63C7C94), UINT64_C(0x62BB8A9FBB2CC79C) }, + { UINT64_C(0x97ADC7D2AA5DC49D), UINT64_C(0x30CC26B331718681), + UINT64_C(0xAC86E6FF56E86EDE), UINT64_C(0x37BCA7A2CD52F7F2), + UINT64_C(0x734D2C949CE6D87F), UINT64_C(0x06A71D71C2F7E0CA) } }, + { { UINT64_C(0x559DCF75C6357D33), UINT64_C(0x4616D940652517DE), + UINT64_C(0x3D576B981CCF207B), UINT64_C(0x51E2D1EF1979F631), + UINT64_C(0x57517DDD06AE8296), UINT64_C(0x309A3D7FD6E7151F) }, + { UINT64_C(0xBA2A23E60E3A6FE5), UINT64_C(0x76CF674AD28B22C3), + UINT64_C(0xD235AD07F8B808C3), UINT64_C(0x7BBF4C586B71213A), + UINT64_C(0x0676792E93271EBB), UINT64_C(0x2CFD2C7605B1FC31) } }, + { { UINT64_C(0x4258E5C037A450F5), UINT64_C(0xC3245F1B52D2B118), + UINT64_C(0x6DF7B48482BC5963), UINT64_C(0xE520DA4D9C273D1E), + UINT64_C(0xED78E0122C3010E5), UINT64_C(0x112229483C1D4C05) }, + { UINT64_C(0xE3DAE5AFC692B490), UINT64_C(0x3272BD10C197F793), + UINT64_C(0xF7EAE411E709ACAA), UINT64_C(0x00B0C95F778270A6), + UINT64_C(0x4DA76EE1220D4350), UINT64_C(0x521E1461AB71E308) } }, + { { UINT64_C(0x7B654323343196A3), UINT64_C(0x35D442ADB0C95250), + UINT64_C(0x38AF50E6E264FF17), UINT64_C(0x28397A412030D2EA), + UINT64_C(0x8F1D84E9F74EEDA1), UINT64_C(0xD521F92DE6FB3C52) }, + { UINT64_C(0xAF358D7795733811), UINT64_C(0xEBFDDD0193ABFE94), + UINT64_C(0x05D8A028D18D99DE), UINT64_C(0x5A664019B5D5BDD9), + UINT64_C(0x3DF172822AA12FE8), UINT64_C(0xB42E006FB889A28E) } }, + { { UINT64_C(0xCF10E97DBC35CB1A), UINT64_C(0xC70A7BBD994DEDC5), + UINT64_C(0x76A5327C37D04FB9), UINT64_C(0x87539F76A76E0CDA), + UINT64_C(0xE9FE493FCD60A6B1), UINT64_C(0xA4574796132F01C0) }, + { UINT64_C(0xC43B85EBDB70B167), UINT64_C(0x81D5039A98551DFA), + UINT64_C(0x6B56FBE91D979FA4), UINT64_C(0x49714FD78615098F), + UINT64_C(0xB10E1CEA94DECAB5), UINT64_C(0x8342EBA3480EF6E3) } }, + { { UINT64_C(0xE1E030B0B3677288), UINT64_C(0x2978174C8D5CE3AF), + UINT64_C(0xAFC0271CF7B2DE98), UINT64_C(0x745BC6F3B99C20B5), + UINT64_C(0x9F6EDCED1E3BB4E5), UINT64_C(0x58D3EE4E73C8C1FC) }, + { UINT64_C(0x1F3535F47FD30124), UINT64_C(0xF366AC705FA62502), + UINT64_C(0x4C4C1FDD965363FE), UINT64_C(0x8B2C77771DE2CA2B), + UINT64_C(0x0CB54743882F1173), UINT64_C(0x94B6B8C071343331) } }, + { { UINT64_C(0x75AF014165B8B35B), UINT64_C(0x6D7B84854670A1F5), + UINT64_C(0x6EAA3A47A3B6D376), UINT64_C(0xD7E673D2CB3E5B66), + UINT64_C(0xC0338E6C9589AB38), UINT64_C(0x4BE26CB309440FAA) }, + { UINT64_C(0x82CB05E7394F9AA3), UINT64_C(0xC45C8A8A7F7792EA), + UINT64_C(0x37E5E33BB687DC70), UINT64_C(0x63853219DFE48E49), + UINT64_C(0x087951C16D0E5C8C), UINT64_C(0x7696A8C72BC27310) } }, + { { UINT64_C(0xA05736D5B67E834A), UINT64_C(0xDD2AA0F29098D42A), + UINT64_C(0x09F0C1D849C69DDC), UINT64_C(0x81F8BC1C8FF0F0F3), + UINT64_C(0x36FD3A4F03037775), UINT64_C(0x8286717D4B06DF5C) }, + { UINT64_C(0xB878F496A9079EA2), UINT64_C(0xA5642426D7DC796D), + UINT64_C(0x29B9351A67FDAC2B), UINT64_C(0x93774C0E1D543CDE), + UINT64_C(0x4F8793BA1A8E31C4), UINT64_C(0x7C9F3F3A6C94798A) } }, + { { UINT64_C(0x23C5AD11CB8ECDB8), UINT64_C(0x1E88D25E485A6A02), + UINT64_C(0xB27CBE84F1E268AE), UINT64_C(0xDDA80238F4CD0475), + UINT64_C(0x4F88857B49F8EB1B), UINT64_C(0x91B1221F52FB07F9) }, + { UINT64_C(0x7CE974608637FA67), UINT64_C(0x528B3CF4632198D8), + UINT64_C(0x33365AB3F6623769), UINT64_C(0x6FEBCFFF3A83A30F), + UINT64_C(0x398F4C999BD341EB), UINT64_C(0x180712BBB33A333C) } }, + { { UINT64_C(0x2B8655A2D93429E7), UINT64_C(0x99D600BB75C8B9EE), + UINT64_C(0x9FC1AF8B88FCA6CD), UINT64_C(0x2FB533867C311F80), + UINT64_C(0x20743ECBE8A71EEE), UINT64_C(0xEC3713C4E848B49E) }, + { UINT64_C(0x5B2037B5BB886817), UINT64_C(0x40EF5AC2307DBAF4), + UINT64_C(0xC2888AF21B3F643D), UINT64_C(0x0D8252E19D5A4190), + UINT64_C(0x06CC0BEC2DB52A8A), UINT64_C(0xB84B98EAAB94E969) } }, + { { UINT64_C(0x2E7AC078A0321E0E), UINT64_C(0x5C5A1168EF3DAAB6), + UINT64_C(0xD2D573CBADDD454A), UINT64_C(0x27E149E236259CC7), + UINT64_C(0x1EDFD469A63F47F1), UINT64_C(0x039AD674F1BD2CFD) }, + { UINT64_C(0xBFA633FC3077D3CC), UINT64_C(0x14A7C82F2FD64E9F), + UINT64_C(0xAAA650149D824999), UINT64_C(0x41AB113B21760F2E), + UINT64_C(0x23E646C51CAE260A), UINT64_C(0x08062C8F68DC5159) } }, + }, + { + { { UINT64_C(0x2E7D0A16204BE028), UINT64_C(0x4F1D082ED0E41851), + UINT64_C(0x15F1DDC63EB317F9), UINT64_C(0xF02750715ADF71D7), + UINT64_C(0x2CE33C2EEE858BC3), UINT64_C(0xA24C76D1DA73B71A) }, + { UINT64_C(0x9EF6A70A6C70C483), UINT64_C(0xEFCF170505CF9612), + UINT64_C(0x9F5BF5A67502DE64), UINT64_C(0xD11122A1A4701973), + UINT64_C(0x82CFAAC2A2EA7B24), UINT64_C(0x6CAD67CC0A4582E1) } }, + { { UINT64_C(0x597A26FFB4DC8600), UINT64_C(0x264A09F3F9288555), + UINT64_C(0x0B06AFF65C27F5F6), UINT64_C(0xCE5AB665D8D544E6), + UINT64_C(0x92F031BE99275C32), UINT64_C(0xAF51C5BBF42E0E7C) }, + { UINT64_C(0x5BB28B061E37B36D), UINT64_C(0x583FBA6A8473543A), + UINT64_C(0xE73FD299F93FB7DC), UINT64_C(0xFCD999A86E2CCAD9), + UINT64_C(0xB8C8A6DF334D4F57), UINT64_C(0x5ADB28DD9A2ACC9B) } }, + { { UINT64_C(0x5ADF3D9A111792B9), UINT64_C(0x1C77A3054F1E0D09), + UINT64_C(0xF9FBCE33A82D3736), UINT64_C(0xF307823E718C8AA3), + UINT64_C(0x860578CF416CCF69), UINT64_C(0xB942ADD81EF8465B) }, + { UINT64_C(0x9EE0CF97CD9472E1), UINT64_C(0xE6792EEFB01528A8), + UINT64_C(0xF99B9A8DC09DA90B), UINT64_C(0x1F521C2DCBF3CCB8), + UINT64_C(0x6BF6694891A62632), UINT64_C(0xCC7A9CEB854FE9DA) } }, + { { UINT64_C(0x46303171491CCB92), UINT64_C(0xA80A8C0D2771235B), + UINT64_C(0xD8E497FFF172C7CF), UINT64_C(0x7F7009D735B193CF), + UINT64_C(0x6B9FD3F7F19DF4BC), UINT64_C(0xADA548C3B46F1E37) }, + { UINT64_C(0x87C6EAA9C7A20270), UINT64_C(0xEF2245D6AE78EF99), + UINT64_C(0x2A121042539EAB95), UINT64_C(0x29A6D5D779B8F5CC), + UINT64_C(0x33803A10B77840DC), UINT64_C(0xFEDD3A7011A6A30F) } }, + { { UINT64_C(0xFA070E22142403D1), UINT64_C(0x68FF316015C6F7F5), + UINT64_C(0xE09F04E6223A0CE8), UINT64_C(0x22BBD01853E14183), + UINT64_C(0x35D9FAFCCF45B75B), UINT64_C(0x3A34819D7ECEEC88) }, + { UINT64_C(0xD9CF7568D33262D2), UINT64_C(0x431036D5841D1505), + UINT64_C(0x0C8005659EB2A79A), UINT64_C(0x8E77D9F05F7EDC6A), + UINT64_C(0x19E12D0565E800AA), UINT64_C(0x335C8D36B7784E7C) } }, + { { UINT64_C(0x8B2FC4E96484FD40), UINT64_C(0xEE702764A35D24EA), + UINT64_C(0x15B28AC7B871C3F3), UINT64_C(0x805B4048E097047F), + UINT64_C(0xD6F1B8DF647CAD2F), UINT64_C(0xF1D5B458DC7DD67F) }, + { UINT64_C(0x324C529C25148803), UINT64_C(0xF6185EBE21274FAF), + UINT64_C(0xAF14751E95148B55), UINT64_C(0x283ED89D28F284F4), + UINT64_C(0x93AD20E74CBEBF1A), UINT64_C(0x5F6EC65D882935E1) } }, + { { UINT64_C(0xE222EBA4A4DCEFE9), UINT64_C(0x63AD235FEC1CEB74), + UINT64_C(0x2E0BF749E05B18E7), UINT64_C(0x547BD050B48BDD87), + UINT64_C(0x0490C970F5AA2FC4), UINT64_C(0xCED5E4CF2B431390) }, + { UINT64_C(0x07D8270451D2898E), UINT64_C(0x44B72442083B57D4), + UINT64_C(0xA4ADA2305037FCE8), UINT64_C(0x55F7905E50510DA6), + UINT64_C(0xD8EE724F8D890A98), UINT64_C(0x925A8E7C11B85640) } }, + { { UINT64_C(0x5BFA10CD1CA459ED), UINT64_C(0x593F085A6DCF56BF), + UINT64_C(0xE6F0AD9BC0579C3E), UINT64_C(0xC11C95A22527C1AD), + UINT64_C(0x7CFA71E1CF1CB8B3), UINT64_C(0xEDCFF8331D6DC79D) }, + { UINT64_C(0x581C4BBE432521C9), UINT64_C(0xBF620096144E11A0), + UINT64_C(0x54C38B71BE3A107B), UINT64_C(0xED555E37E2606EC0), + UINT64_C(0x3FB148B8D721D034), UINT64_C(0x79D53DAD0091BC90) } }, + { { UINT64_C(0xE32068C5B7082C80), UINT64_C(0x4140FFD27A144E22), + UINT64_C(0x5811D2F09EDD9E86), UINT64_C(0xCDD79B5FC572C465), + UINT64_C(0x3563FED1C97BF450), UINT64_C(0x985C1444F2CE5C9C) }, + { UINT64_C(0x260AE79799950F1C), UINT64_C(0x659F4F40765E9DED), + UINT64_C(0x2A412D662E3BC286), UINT64_C(0xE865E62CF87E0C82), + UINT64_C(0xD63D3A9A6C05E7D7), UINT64_C(0x96725D678686F89A) } }, + { { UINT64_C(0xC99A5E4CAB7EA0F5), UINT64_C(0xC9860A1AC5393FA9), + UINT64_C(0x9ED83CEE8FDEEFC0), UINT64_C(0xE3EA8B4C5ED6869A), + UINT64_C(0x89A85463D2EED3A9), UINT64_C(0x2CD91B6DE421A622) }, + { UINT64_C(0x6FEC1EF32C91C41D), UINT64_C(0xB1540D1F8171037D), + UINT64_C(0x4FE4991A1C010E5B), UINT64_C(0x28A3469FFC1C7368), + UINT64_C(0xE1EEECD1AF118781), UINT64_C(0x1BCCB97799EF3531) } }, + { { UINT64_C(0x63D3B638C4DAB7B8), UINT64_C(0xD92133B63F7F5BAB), + UINT64_C(0x2573EE2009FB6069), UINT64_C(0x771FABDF890A1686), + UINT64_C(0x1D0BA21FA77AFFF5), UINT64_C(0x83145FCCBA3DD2C0) }, + { UINT64_C(0xFA073A812D115C20), UINT64_C(0x6AB7A9D319176F27), + UINT64_C(0xAF62CF939AC639EE), UINT64_C(0xF73848B92CCD1319), + UINT64_C(0x3B6132343C71659D), UINT64_C(0xF8E0011C10AB3826) } }, + { { UINT64_C(0x0501F0360282FFA5), UINT64_C(0xC39A5CF4D9E0F15A), + UINT64_C(0x48D8C7299A3D1F3C), UINT64_C(0xB5FC136B64E18EDA), + UINT64_C(0xE81B53D97E58FEF0), UINT64_C(0x0D534055F7B0F28D) }, + { UINT64_C(0x47B8DE127A80619B), UINT64_C(0x60E2A2B381F9E55D), + UINT64_C(0x6E9624D7CF564CC5), UINT64_C(0xFDF18A216BDEDFFF), + UINT64_C(0x3787DE38C0D5FC82), UINT64_C(0xCBCAA347497A6B11) } }, + { { UINT64_C(0x6E7EF35EB226465A), UINT64_C(0x4B4699195F8A2BAF), + UINT64_C(0x44B3A3CF1120D93F), UINT64_C(0xB052C8B668F34AD1), + UINT64_C(0x27EC574BEF7632DD), UINT64_C(0xAEBEA108685DE26F) }, + { UINT64_C(0xDA33236BE39424B6), UINT64_C(0xB1BD94A9EBCC22AD), + UINT64_C(0x6DDEE6CC2CDFB5D5), UINT64_C(0xBDAED9276F14069A), + UINT64_C(0x2ADE427C2A247CB7), UINT64_C(0xCE96B436ED156A40) } }, + { { UINT64_C(0xDDDCA36081F3F819), UINT64_C(0x4AF4A49FD419B96A), + UINT64_C(0x746C65257CB966B9), UINT64_C(0x01E390886F610023), + UINT64_C(0x05ECB38D98DD33FC), UINT64_C(0x962B971B8F84EDF4) }, + { UINT64_C(0xEB32C0A56A6F2602), UINT64_C(0xF026AF71562D60F2), + UINT64_C(0xA9E246BF84615FAB), UINT64_C(0xAD96709275DBAE01), + UINT64_C(0xBF97C79B3ECE5D07), UINT64_C(0xE06266C774EAA3D3) } }, + { { UINT64_C(0x161A01572E6DBB6E), UINT64_C(0xB8AF490460FA8F47), + UINT64_C(0xE4336C4400197F22), UINT64_C(0xF811AFFA9CEDCE0E), + UINT64_C(0xB1DD7685F94C2EF1), UINT64_C(0xEEDC0F4BCA957BB0) }, + { UINT64_C(0xD319FD574AA76BB1), UINT64_C(0xB3525D7C16CD7CCB), + UINT64_C(0x7B22DA9CA97DD072), UINT64_C(0x99DB84BD38A83E71), + UINT64_C(0x4939BC8DC0EDD8BE), UINT64_C(0x06D524EA903A932C) } }, + { { UINT64_C(0x4BC950EC0E31F639), UINT64_C(0xB7ABD3DC6016BE30), + UINT64_C(0x3B0F44736703DAD0), UINT64_C(0xCC405F8B0AC1C4EA), + UINT64_C(0x9BED5E57176C3FEE), UINT64_C(0xF452481036AE36C2) }, + { UINT64_C(0xC1EDBB8315D7B503), UINT64_C(0x943B1156E30F3657), + UINT64_C(0x984E9EEF98377805), UINT64_C(0x291AE7AC36CF1DEB), + UINT64_C(0xFED8748CA9F66DF3), UINT64_C(0xECA758BBFEA8FA5D) } }, + }, + { + { { UINT64_C(0xACC787EF2DD1B249), UINT64_C(0x736E1030D82976F1), + UINT64_C(0x0A6940FAA01B3649), UINT64_C(0xE00B926BC42341E7), + UINT64_C(0x911508D0DE8FFD6C), UINT64_C(0x4DCF8D465276B0CB) }, + { UINT64_C(0x23AD0A90CC3CAD8D), UINT64_C(0x2A92E54CADED962A), + UINT64_C(0x93FBEC4DF231BFAF), UINT64_C(0x9544BC774798987A), + UINT64_C(0x48084E2508E29F60), UINT64_C(0x0C0D2F4332DE5869) } }, + { { UINT64_C(0x6778F9703A9ABC13), UINT64_C(0xFD014FAC3D2B166B), + UINT64_C(0x1FE4FC783C6FED60), UINT64_C(0x04295FA8AA7C69C5), + UINT64_C(0xA01DE56D7C123175), UINT64_C(0x0FA0D3A83D9A713A) }, + { UINT64_C(0xA7A6E5E3E3E08ADD), UINT64_C(0xBD77E94B1AC58F85), + UINT64_C(0x078F6FD2B7321A9C), UINT64_C(0x9564601E911EF6D9), + UINT64_C(0x31C5C1B2415C6BEF), UINT64_C(0xE6C0C91ED3212C62) } }, + { { UINT64_C(0xBA7BD23C0D16022F), UINT64_C(0xE9CF4750198BE288), + UINT64_C(0x304E316947DEEC65), UINT64_C(0xCF65B41F96EEB288), + UINT64_C(0x17E99C17927E9E3B), UINT64_C(0x82225546F6630A80) }, + { UINT64_C(0x15122B8ACA067BD9), UINT64_C(0xE2673205B77B4E98), + UINT64_C(0x130375659407CA63), UINT64_C(0x53624F548B621602), + UINT64_C(0x96AF2CB1EAE4BD06), UINT64_C(0x576ECD1C8FA20829) } }, + { { UINT64_C(0xA551CE107E02D2D0), UINT64_C(0x1584ED249D13DBC7), + UINT64_C(0x082017AD4DA7B6D8), UINT64_C(0x81918A8FE054BC48), + UINT64_C(0x677DB48E572DC384), UINT64_C(0x2EF822966155484C) }, + { UINT64_C(0xC3DB14C641B9C231), UINT64_C(0x910A87D14A766192), + UINT64_C(0x93D5CC8610AB8E0F), UINT64_C(0x4194D548AE57CA1B), + UINT64_C(0xFAF3A1D6267FC37A), UINT64_C(0x70EC236413B87C97) } }, + { { UINT64_C(0x064B565B5E12756A), UINT64_C(0x953B7BD1AE49C98E), + UINT64_C(0xE0CE8284F7001D91), UINT64_C(0x1546060BF31108D0), + UINT64_C(0xDBC2C3F46779B6E2), UINT64_C(0x157AA47DE0DD07CF) }, + { UINT64_C(0xBF4A1C6FF23B261E), UINT64_C(0x5B8EED30654F4BE5), + UINT64_C(0xDF5896D36B20CCD8), UINT64_C(0x56920E2C559ED23D), + UINT64_C(0x901F342EFA6E3E27), UINT64_C(0x745C747C896CA082) } }, + { { UINT64_C(0xDBCCD5752944EC84), UINT64_C(0x54A2A935A5FF65FE), + UINT64_C(0x88C92A5E1A1319B6), UINT64_C(0x9537C28F82DA96C1), + UINT64_C(0xB683647435F93C46), UINT64_C(0xEC526A1D65B0846C) }, + { UINT64_C(0x6F12AFBDF382C412), UINT64_C(0x5EBC81D89E99FA06), + UINT64_C(0x97B5D672869B93BD), UINT64_C(0x2983C310377E12AA), + UINT64_C(0x4875968124D681EA), UINT64_C(0x1E0BD106287FD767) } }, + { { UINT64_C(0x0AC75A3E7231247F), UINT64_C(0x65C20DE6EF27AD3A), + UINT64_C(0x87EB6CF1BD02EEE5), UINT64_C(0x264ACA7A00147E03), + UINT64_C(0xEBC78581AE2A9437), UINT64_C(0x9929964E6316BFA5) }, + { UINT64_C(0xDC09E0409AF207EF), UINT64_C(0x3ECFFE2D0C9D8658), + UINT64_C(0x547EA735DFB43D38), UINT64_C(0x5485247BD04B1B20), + UINT64_C(0xB18D3F02BFD8B609), UINT64_C(0xEEB3E805CCE73705) } }, + { { UINT64_C(0xDAB1A525DB93850F), UINT64_C(0x18ADAA238365B7D5), + UINT64_C(0x58485C90113FC8C7), UINT64_C(0x80C3DBB9348AD323), + UINT64_C(0xAF892FB5E16ADCA1), UINT64_C(0x2183C879979F005A) }, + { UINT64_C(0x20FA1A940643A99E), UINT64_C(0x2741221C1A1609CB), + UINT64_C(0x1C1687E53C2FBDDC), UINT64_C(0xDCCF329ED420D6CF), + UINT64_C(0x75D5577D2B7197D1), UINT64_C(0x4C3C3875C8729D9C) } }, + { { UINT64_C(0x5E79F995E5CBDCB9), UINT64_C(0x03139824A742FCC7), + UINT64_C(0x6D0C214A239EF4A1), UINT64_C(0x53A27952401A2944), + UINT64_C(0xF42A1B34C10BCDF0), UINT64_C(0x426BAA437CF38061) }, + { UINT64_C(0x16A53139A96AD0C8), UINT64_C(0x627F1D316BAD5301), + UINT64_C(0x5AF748774ACCD627), UINT64_C(0x3C58A1C5B55B0FB8), + UINT64_C(0xFAA57B91F4399A6A), UINT64_C(0xBAD283FBC28094B8) } }, + { { UINT64_C(0xBA32AC6183E10A93), UINT64_C(0x1C91F6B4EC06BDB0), + UINT64_C(0x42E6CFBC65F60C93), UINT64_C(0xEFE33BC82C0CDCBE), + UINT64_C(0xE0FE1D094D6414F2), UINT64_C(0x4C11231676FA5C5B) }, + { UINT64_C(0x812C1DC62E26200A), UINT64_C(0xD6C413C5EE879D25), + UINT64_C(0xBEADE255BCA8BAFE), UINT64_C(0x0EAF4AE2CE2BA0E7), + UINT64_C(0x66E9FFB0C4F4408A), UINT64_C(0xB36A86D79782C7AD) } }, + { { UINT64_C(0x10FCD1F4BAD8D1C7), UINT64_C(0xC903816A4502F645), + UINT64_C(0x7FAC1CC1A503B895), UINT64_C(0x8BCD60410778900C), + UINT64_C(0x5A5F22025BCF2784), UINT64_C(0x9B157E8710EDB896) }, + { UINT64_C(0x4C58DA69F602A8B1), UINT64_C(0xD55132F859EC9D7E), + UINT64_C(0x155B719AA26D4870), UINT64_C(0x25AAFCA336441746), + UINT64_C(0x01F83338DD3B6B30), UINT64_C(0xD52BB5C1551917CC) } }, + { { UINT64_C(0xA0B6207B6135066A), UINT64_C(0xB3409F842AEC8CBD), + UINT64_C(0x5EBFD43619D87DF0), UINT64_C(0xCB4C209BE8526DE2), + UINT64_C(0xD764085B21E1A230), UINT64_C(0x96F915540899964A) }, + { UINT64_C(0xB0BEC8EFA57D122A), UINT64_C(0xC572EC565D9D0B33), + UINT64_C(0xEBE2A780CFA7C72C), UINT64_C(0x52D40CDB9EF3295C), + UINT64_C(0x640045840DE74DFE), UINT64_C(0xA6846432C0809716) } }, + { { UINT64_C(0x0D09E8CD02C979BC), UINT64_C(0xEC4B21F6409F4F2A), + UINT64_C(0x68125C7013FB07CA), UINT64_C(0x1C4CFC176FDFA72A), + UINT64_C(0xC9E71B9E04539FCD), UINT64_C(0x94B7103D8BA70797) }, + { UINT64_C(0x6B81E82FB33FDE83), UINT64_C(0x7CA9A8CAEABAFD4B), + UINT64_C(0xADD85A67EAB819CE), UINT64_C(0xAEC2548398E99FFC), + UINT64_C(0x938D6440274A07B6), UINT64_C(0x0A5C7097564A6AA0) } }, + { { UINT64_C(0x7284FF502F4FCEB6), UINT64_C(0x0A28715A78D0D5CB), + UINT64_C(0xE70B7014BFCE187C), UINT64_C(0xA6B538F57A17148D), + UINT64_C(0x1DAB07C9DD427166), UINT64_C(0x5C5578B0149D23CA) }, + { UINT64_C(0x875E2056875B5EDE), UINT64_C(0xCBF44B6D02C893B9), + UINT64_C(0x5715A77E5C2993FB), UINT64_C(0xAF3281463410597E), + UINT64_C(0x65DF418F42DC49DF), UINT64_C(0x7AC9C720A9EE52F6) } }, + { { UINT64_C(0xB1C9AA0762955486), UINT64_C(0xCBF35BE3245061D7), + UINT64_C(0x811E1BD38CF4DDC0), UINT64_C(0xD9D4589C948F7C84), + UINT64_C(0x30D09A0FCB0F996D), UINT64_C(0x1A1B3B7A590E7704) }, + { UINT64_C(0xA848E3492082768D), UINT64_C(0x9FEBD4929A249DF4), + UINT64_C(0x503420AF5F20439A), UINT64_C(0x0CBE52B68E2BFCD4), + UINT64_C(0xB1D5E261118C91B2), UINT64_C(0x93CFF6DA71D8F2BC) } }, + { { UINT64_C(0x5F5BC06B8AB58944), UINT64_C(0xE4BED5384979882D), + UINT64_C(0x57C30362D79B0EB1), UINT64_C(0x391AE2C1EF7C56D8), + UINT64_C(0x28BC2E97ADD98625), UINT64_C(0xFA8E86B81B257107) }, + { UINT64_C(0x5E4859F86118C715), UINT64_C(0x91C83324524C71DD), + UINT64_C(0xFB2092436D2F5E6D), UINT64_C(0x6B4FE21F2A900A43), + UINT64_C(0x241F75D632A73C1F), UINT64_C(0xF5BC46295AE89613) } }, + } +}; + +/*- + * Finite field inversion. + * Computed with Bernstein-Yang algorithm. + * https://tches.iacr.org/index.php/TCHES/article/view/8298 + * Based on https://github.com/mit-plv/fiat-crypto/tree/master/inversion/c + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp384r1_inv(fe_t output, const fe_t t1) +{ + int i; + fe_t v1, r1, v2; + limb_t *r2 = output; + limb_t f1[LIMB_CNT + 1], g1[LIMB_CNT + 1], f2[LIMB_CNT + 1], + g2[LIMB_CNT + 1]; + limb_t d2, d1 = 1; + + fe_copy(g1, t1); + g1[LIMB_CNT] = 0; + fe_copy(f1, const_psat); + f1[LIMB_CNT] = 0; + fe_copy(r1, const_one); + fe_set_zero(v1); + + /* 1110 divstep iterations */ + for (i = 0; i < 555; i++) { + fiat_secp384r1_divstep(&d2, f2, g2, v2, r2, d1, f1, g1, v1, r1); + fiat_secp384r1_divstep(&d1, f1, g1, v1, r1, d2, f2, g2, v2, r2); + } + + fiat_secp384r1_opp(output, v1); + fiat_secp384r1_selectznz(output, f1[LIMB_CNT] >> (LIMB_BITS - 1), v1, + output); + fiat_secp384r1_mul(output, output, const_divstep); +} + +/*- + * Q := 2P, both projective, Q and P same pointers OK + * Autogenerated: op3/dbl_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 6 + * ASSERT: a = -3 + */ +static void +point_double(pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X = P->X; + const limb_t *Y = P->Y; + const limb_t *Z = P->Z; + limb_t *X3 = Q->X; + limb_t *Y3 = Q->Y; + limb_t *Z3 = Q->Z; + + /* the curve arith formula */ + fiat_secp384r1_square(t0, X); + fiat_secp384r1_square(t1, Y); + fiat_secp384r1_square(t2, Z); + fiat_secp384r1_mul(t3, X, Y); + fiat_secp384r1_add(t3, t3, t3); + fiat_secp384r1_mul(t4, Y, Z); + fiat_secp384r1_mul(Z3, X, Z); + fiat_secp384r1_add(Z3, Z3, Z3); + fiat_secp384r1_mul(Y3, b, t2); + fiat_secp384r1_sub(Y3, Y3, Z3); + fiat_secp384r1_add(X3, Y3, Y3); + fiat_secp384r1_add(Y3, X3, Y3); + fiat_secp384r1_sub(X3, t1, Y3); + fiat_secp384r1_add(Y3, t1, Y3); + fiat_secp384r1_mul(Y3, X3, Y3); + fiat_secp384r1_mul(X3, X3, t3); + fiat_secp384r1_add(t3, t2, t2); + fiat_secp384r1_add(t2, t2, t3); + fiat_secp384r1_mul(Z3, b, Z3); + fiat_secp384r1_sub(Z3, Z3, t2); + fiat_secp384r1_sub(Z3, Z3, t0); + fiat_secp384r1_add(t3, Z3, Z3); + fiat_secp384r1_add(Z3, Z3, t3); + fiat_secp384r1_add(t3, t0, t0); + fiat_secp384r1_add(t0, t3, t0); + fiat_secp384r1_sub(t0, t0, t2); + fiat_secp384r1_mul(t0, t0, Z3); + fiat_secp384r1_add(Y3, Y3, t0); + fiat_secp384r1_add(t0, t4, t4); + fiat_secp384r1_mul(Z3, t0, Z3); + fiat_secp384r1_sub(X3, X3, Z3); + fiat_secp384r1_mul(Z3, t0, t1); + fiat_secp384r1_add(Z3, Z3, Z3); + fiat_secp384r1_add(Z3, Z3, Z3); +} + +/*- + * R := Q + P where R and Q are projective, P affine. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_mixed.op3 + * https://eprint.iacr.org/2015/1060 Alg 5 + * ASSERT: a = -3 + */ +static void +point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + fe_t X3; + fe_t Y3; + fe_t Z3; + limb_t nz; + + /* check P for affine inf */ + fiat_secp384r1_nonzero(&nz, P->Y); + + /* the curve arith formula */ + fiat_secp384r1_mul(t0, X1, X2); + fiat_secp384r1_mul(t1, Y1, Y2); + fiat_secp384r1_add(t3, X2, Y2); + fiat_secp384r1_add(t4, X1, Y1); + fiat_secp384r1_mul(t3, t3, t4); + fiat_secp384r1_add(t4, t0, t1); + fiat_secp384r1_sub(t3, t3, t4); + fiat_secp384r1_mul(t4, Y2, Z1); + fiat_secp384r1_add(t4, t4, Y1); + fiat_secp384r1_mul(Y3, X2, Z1); + fiat_secp384r1_add(Y3, Y3, X1); + fiat_secp384r1_mul(Z3, b, Z1); + fiat_secp384r1_sub(X3, Y3, Z3); + fiat_secp384r1_add(Z3, X3, X3); + fiat_secp384r1_add(X3, X3, Z3); + fiat_secp384r1_sub(Z3, t1, X3); + fiat_secp384r1_add(X3, t1, X3); + fiat_secp384r1_mul(Y3, b, Y3); + fiat_secp384r1_add(t1, Z1, Z1); + fiat_secp384r1_add(t2, t1, Z1); + fiat_secp384r1_sub(Y3, Y3, t2); + fiat_secp384r1_sub(Y3, Y3, t0); + fiat_secp384r1_add(t1, Y3, Y3); + fiat_secp384r1_add(Y3, t1, Y3); + fiat_secp384r1_add(t1, t0, t0); + fiat_secp384r1_add(t0, t1, t0); + fiat_secp384r1_sub(t0, t0, t2); + fiat_secp384r1_mul(t1, t4, Y3); + fiat_secp384r1_mul(t2, t0, Y3); + fiat_secp384r1_mul(Y3, X3, Z3); + fiat_secp384r1_add(Y3, Y3, t2); + fiat_secp384r1_mul(X3, t3, X3); + fiat_secp384r1_sub(X3, X3, t1); + fiat_secp384r1_mul(Z3, t4, Z3); + fiat_secp384r1_mul(t1, t3, t0); + fiat_secp384r1_add(Z3, Z3, t1); + + /* if P is inf, throw all that away and take Q */ + fiat_secp384r1_selectznz(R->X, nz, Q->X, X3); + fiat_secp384r1_selectznz(R->Y, nz, Q->Y, Y3); + fiat_secp384r1_selectznz(R->Z, nz, Q->Z, Z3); +} + +/*- + * R := Q + P all projective. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 4 + * ASSERT: a = -3 + */ +static void +point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4, t5; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + const limb_t *Z2 = P->Z; + limb_t *X3 = R->X; + limb_t *Y3 = R->Y; + limb_t *Z3 = R->Z; + + /* the curve arith formula */ + fiat_secp384r1_mul(t0, X1, X2); + fiat_secp384r1_mul(t1, Y1, Y2); + fiat_secp384r1_mul(t2, Z1, Z2); + fiat_secp384r1_add(t3, X1, Y1); + fiat_secp384r1_add(t4, X2, Y2); + fiat_secp384r1_mul(t3, t3, t4); + fiat_secp384r1_add(t4, t0, t1); + fiat_secp384r1_sub(t3, t3, t4); + fiat_secp384r1_add(t4, Y1, Z1); + fiat_secp384r1_add(t5, Y2, Z2); + fiat_secp384r1_mul(t4, t4, t5); + fiat_secp384r1_add(t5, t1, t2); + fiat_secp384r1_sub(t4, t4, t5); + fiat_secp384r1_add(X3, X1, Z1); + fiat_secp384r1_add(Y3, X2, Z2); + fiat_secp384r1_mul(X3, X3, Y3); + fiat_secp384r1_add(Y3, t0, t2); + fiat_secp384r1_sub(Y3, X3, Y3); + fiat_secp384r1_mul(Z3, b, t2); + fiat_secp384r1_sub(X3, Y3, Z3); + fiat_secp384r1_add(Z3, X3, X3); + fiat_secp384r1_add(X3, X3, Z3); + fiat_secp384r1_sub(Z3, t1, X3); + fiat_secp384r1_add(X3, t1, X3); + fiat_secp384r1_mul(Y3, b, Y3); + fiat_secp384r1_add(t1, t2, t2); + fiat_secp384r1_add(t2, t1, t2); + fiat_secp384r1_sub(Y3, Y3, t2); + fiat_secp384r1_sub(Y3, Y3, t0); + fiat_secp384r1_add(t1, Y3, Y3); + fiat_secp384r1_add(Y3, t1, Y3); + fiat_secp384r1_add(t1, t0, t0); + fiat_secp384r1_add(t0, t1, t0); + fiat_secp384r1_sub(t0, t0, t2); + fiat_secp384r1_mul(t1, t4, Y3); + fiat_secp384r1_mul(t2, t0, Y3); + fiat_secp384r1_mul(Y3, X3, Z3); + fiat_secp384r1_add(Y3, Y3, t2); + fiat_secp384r1_mul(X3, t3, X3); + fiat_secp384r1_sub(X3, X3, t1); + fiat_secp384r1_mul(Z3, t4, Z3); + fiat_secp384r1_mul(t1, t3, t0); + fiat_secp384r1_add(Z3, Z3, t1); +} + +/* constants */ +#define RADIX 5 +#define DRADIX (1 << RADIX) +#define DRADIX_WNAF ((DRADIX) << 1) + +/*- + * precomp for wnaf scalar multiplication: + * precomp[0] = 1P + * precomp[1] = 3P + * precomp[2] = 5P + * precomp[3] = 7P + * precomp[4] = 9P + * ... + */ +static void +precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P) +{ + int i; + + fe_copy(precomp[0].X, P->X); + fe_copy(precomp[0].Y, P->Y); + fe_copy(precomp[0].Z, const_one); + point_double(&precomp[DRADIX / 2 - 1], &precomp[0]); + + for (i = 1; i < DRADIX / 2; i++) + point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]); +} + +/* fetch a scalar bit */ +static int +scalar_get_bit(const unsigned char in[48], int idx) +{ + int widx, rshift; + + widx = idx >> 3; + rshift = idx & 0x7; + + if (idx < 0 || widx >= 48) + return 0; + + return (in[widx] >> rshift) & 0x1; +} + +/*- + * Compute "regular" wnaf representation of a scalar. + * See "Exponent Recoding and Regular Exponentiation Algorithms", + * Tunstall et al., AfricaCrypt 2009, Alg 6. + * It forces an odd scalar and outputs digits in + * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...} + * i.e. signed odd digits with _no zeroes_ -- that makes it "regular". + */ +static void +scalar_rwnaf(int8_t out[77], const unsigned char in[48]) +{ + int i; + int8_t window, d; + + window = (in[0] & (DRADIX_WNAF - 1)) | 1; + for (i = 0; i < 76; i++) { + d = (window & (DRADIX_WNAF - 1)) - DRADIX; + out[i] = d; + window = (window - d) >> RADIX; + window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1; + window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2; + window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3; + window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4; + window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5; + } + out[i] = window; +} + +/*- + * Compute "textbook" wnaf representation of a scalar. + * NB: not constant time + */ +static void +scalar_wnaf(int8_t out[385], const unsigned char in[48]) +{ + int i; + int8_t window, d; + + window = in[0] & (DRADIX_WNAF - 1); + for (i = 0; i < 385; i++) { + d = 0; + if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX)) + d -= DRADIX_WNAF; + out[i] = d; + window = (window - d) >> 1; + window += scalar_get_bit(in, i + 1 + RADIX) << RADIX; + } +} + +/*- + * Simultaneous scalar multiplication: interleaved "textbook" wnaf. + * NB: not constant time + */ +static void +var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48], + const unsigned char b[48], const pt_aff_t *P) +{ + int i, d, is_neg, is_inf = 1, flipped = 0; + int8_t anaf[385] = { 0 }; + int8_t bnaf[385] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_wnaf(anaf, a); + scalar_wnaf(bnaf, b); + + for (i = 384; i >= 0; i--) { + if (!is_inf) + point_double(&Q, &Q); + if ((d = bnaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp384r1_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &precomp[d].X); + fe_copy(Q.Y, &precomp[d].Y); + fe_copy(Q.Z, &precomp[d].Z); + is_inf = 0; + } else + point_add_proj(&Q, &Q, &precomp[d]); + } + if ((d = anaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp384r1_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &lut_cmb[0][d].X); + fe_copy(Q.Y, &lut_cmb[0][d].Y); + fe_copy(Q.Z, const_one); + is_inf = 0; + } else + point_add_mixed(&Q, &Q, &lut_cmb[0][d]); + } + } + + if (is_inf) { + /* initialize accumulator to inf: all-zero scalars */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + } + + if (flipped) { + /* correct sign */ + fiat_secp384r1_opp(Q.Y, Q.Y); + } + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp384r1_inv(Q.Z, Q.Z); + fiat_secp384r1_mul(out->X, Q.X, Q.Z); + fiat_secp384r1_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. + */ +static void +var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48], + const pt_aff_t *P) +{ + int i, j, d, diff, is_neg; + int8_t rnaf[77] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_rwnaf(rnaf, scalar); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + /* initialize accumulator to high digit */ + d = (rnaf[76] - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp384r1_selectznz(Q.X, diff, Q.X, precomp[j].X); + fiat_secp384r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y); + fiat_secp384r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z); + } + + for (i = 75; i >= 0; i--) { + for (j = 0; j < RADIX; j++) + point_double(&Q, &Q); + d = rnaf[i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp384r1_selectznz(lut.X, diff, lut.X, precomp[j].X); + fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y); + fiat_secp384r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z); + } + /* negate lut point if digit is negative */ + fiat_secp384r1_opp(out->Y, lut.Y); + fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_proj(&Q, &Q, &lut); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, precomp[0].X); + fiat_secp384r1_opp(lut.Y, precomp[0].Y); + fe_copy(lut.Z, precomp[0].Z); + point_add_proj(&lut, &lut, &Q); + fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X); + fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y); + fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp384r1_inv(Q.Z, Q.Z); + fiat_secp384r1_mul(out->X, Q.X, Q.Z); + fiat_secp384r1_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Fixed scalar multiplication: comb with interleaving. + */ +static void +fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) +{ + int i, j, k, d, diff, is_neg = 0; + int8_t rnaf[77] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; + + scalar_rwnaf(rnaf, scalar); + + /* initalize accumulator to inf */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + for (i = 3; i >= 0; i--) { + for (j = 0; i != 3 && j < RADIX; j++) + point_double(&Q, &Q); + for (j = 0; j < 21; j++) { + if (j * 4 + i > 76) + continue; + d = rnaf[j * 4 + i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (k = 0; k < DRADIX / 2; k++) { + diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp384r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X); + fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y); + } + /* negate lut point if digit is negative */ + fiat_secp384r1_opp(out->Y, lut.Y); + fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_mixed(&Q, &Q, &lut); + } + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, lut_cmb[0][0].X); + fiat_secp384r1_opp(lut.Y, lut_cmb[0][0].Y); + point_add_mixed(&R, &Q, &lut); + fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X); + fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y); + fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp384r1_inv(Q.Z, Q.Z); + fiat_secp384r1_mul(out->X, Q.X, Q.Z); + fiat_secp384r1_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Wrapper: simultaneous scalar mutiplication. + * outx, outy := a * G + b * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char a[48], + const unsigned char b[48], + const unsigned char inx[48], + const unsigned char iny[48]) +{ + pt_aff_t P; + + fiat_secp384r1_from_bytes(P.X, inx); + fiat_secp384r1_from_bytes(P.Y, iny); + fiat_secp384r1_to_montgomery(P.X, P.X); + fiat_secp384r1_to_montgomery(P.Y, P.Y); + /* simultaneous scalar multiplication */ + var_smul_wnaf_two(&P, a, b, &P); + + fiat_secp384r1_from_montgomery(P.X, P.X); + fiat_secp384r1_from_montgomery(P.Y, P.Y); + fiat_secp384r1_to_bytes(outx, P.X); + fiat_secp384r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: fixed scalar mutiplication. + * outx, outy := scalar * G + * Everything is LE byte ordering. + */ +static void +point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48]) +{ + pt_aff_t P; + + /* fixed scmul function */ + fixed_smul_cmb(&P, scalar); + fiat_secp384r1_from_montgomery(P.X, P.X); + fiat_secp384r1_from_montgomery(P.Y, P.Y); + fiat_secp384r1_to_bytes(outx, P.X); + fiat_secp384r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: variable point scalar mutiplication. + * outx, outy := scalar * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48], + const unsigned char inx[48], + const unsigned char iny[48]) +{ + pt_aff_t P; + + fiat_secp384r1_from_bytes(P.X, inx); + fiat_secp384r1_from_bytes(P.Y, iny); + fiat_secp384r1_to_montgomery(P.X, P.X); + fiat_secp384r1_to_montgomery(P.Y, P.Y); + /* var scmul function */ + var_smul_rwnaf(&P, scalar, &P); + fiat_secp384r1_from_montgomery(P.X, P.X); + fiat_secp384r1_from_montgomery(P.Y, P.Y); + fiat_secp384r1_to_bytes(outx, P.X); + fiat_secp384r1_to_bytes(outy, P.Y); +} + +#undef RADIX +#include "ecp.h" +#include "mpi-priv.h" +#include "mplogic.h" + +/*- + * reverse bytes -- total hack + */ +#define MP_BE2LE(a) \ + do { \ + unsigned char z_bswap; \ + z_bswap = a[0]; \ + a[0] = a[47]; \ + a[47] = z_bswap; \ + z_bswap = a[1]; \ + a[1] = a[46]; \ + a[46] = z_bswap; \ + z_bswap = a[2]; \ + a[2] = a[45]; \ + a[45] = z_bswap; \ + z_bswap = a[3]; \ + a[3] = a[44]; \ + a[44] = z_bswap; \ + z_bswap = a[4]; \ + a[4] = a[43]; \ + a[43] = z_bswap; \ + z_bswap = a[5]; \ + a[5] = a[42]; \ + a[42] = z_bswap; \ + z_bswap = a[6]; \ + a[6] = a[41]; \ + a[41] = z_bswap; \ + z_bswap = a[7]; \ + a[7] = a[40]; \ + a[40] = z_bswap; \ + z_bswap = a[8]; \ + a[8] = a[39]; \ + a[39] = z_bswap; \ + z_bswap = a[9]; \ + a[9] = a[38]; \ + a[38] = z_bswap; \ + z_bswap = a[10]; \ + a[10] = a[37]; \ + a[37] = z_bswap; \ + z_bswap = a[11]; \ + a[11] = a[36]; \ + a[36] = z_bswap; \ + z_bswap = a[12]; \ + a[12] = a[35]; \ + a[35] = z_bswap; \ + z_bswap = a[13]; \ + a[13] = a[34]; \ + a[34] = z_bswap; \ + z_bswap = a[14]; \ + a[14] = a[33]; \ + a[33] = z_bswap; \ + z_bswap = a[15]; \ + a[15] = a[32]; \ + a[32] = z_bswap; \ + z_bswap = a[16]; \ + a[16] = a[31]; \ + a[31] = z_bswap; \ + z_bswap = a[17]; \ + a[17] = a[30]; \ + a[30] = z_bswap; \ + z_bswap = a[18]; \ + a[18] = a[29]; \ + a[29] = z_bswap; \ + z_bswap = a[19]; \ + a[19] = a[28]; \ + a[28] = z_bswap; \ + z_bswap = a[20]; \ + a[20] = a[27]; \ + a[27] = z_bswap; \ + z_bswap = a[21]; \ + a[21] = a[26]; \ + a[26] = z_bswap; \ + z_bswap = a[22]; \ + a[22] = a[25]; \ + a[25] = z_bswap; \ + z_bswap = a[23]; \ + a[23] = a[24]; \ + a[24] = z_bswap; \ + } while (0) + +static mp_err +point_mul_g_secp384r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[48]; + unsigned char b_y[48]; + unsigned char b_n[48]; + mp_err res; + + ARGCHK(n != NULL && out_x != NULL && out_y != NULL, MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 384 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48)); + MP_BE2LE(b_n); + point_mul_g_secp384r1(b_x, b_y, b_n); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_secp384r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[48]; + unsigned char b_y[48]; + unsigned char b_n[48]; + mp_err res; + + ARGCHK(n != NULL && in_x != NULL && in_y != NULL && out_x != NULL && + out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 384 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 48)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n); + point_mul_secp384r1(b_x, b_y, b_n, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_two_secp384r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) +{ + unsigned char b_x[48]; + unsigned char b_y[48]; + unsigned char b_n1[48]; + unsigned char b_n2[48]; + mp_err res; + + /* If n2 == NULL or 0, this is just a base-point multiplication. */ + if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) + return point_mul_g_secp384r1_wrap(n1, out_x, out_y, group); + + /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ + if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) + return point_mul_secp384r1_wrap(n2, in_x, in_y, out_x, out_y, group); + + ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n1) > 384 || mp_cmp_z(n1) != MP_GT || + mpl_significant_bits(n2) > 384 || mp_cmp_z(n2) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n1, b_n1, 48)); + MP_CHECKOK(mp_to_fixlen_octets(n2, b_n2, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 48)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n1); + MP_BE2LE(b_n2); + point_mul_two_secp384r1(b_x, b_y, b_n1, b_n2, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48)); + +CLEANUP: + return res; +} + +mp_err +ec_group_set_secp384r1(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P384) { + group->base_point_mul = &point_mul_g_secp384r1_wrap; + group->point_mul = &point_mul_secp384r1_wrap; + group->points_mul = &point_mul_two_secp384r1_wrap; + } + return MP_OKAY; +} + +#else /* __SIZEOF_INT128__ */ + +#include +#include +#define LIMB_BITS 32 +#define LIMB_CNT 12 +/* Field elements */ +typedef uint32_t fe_t[LIMB_CNT]; +typedef uint32_t limb_t; + +#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t)) +#define fe_set_zero(d) memset(d, 0, sizeof(fe_t)) + +/* Projective points */ +typedef struct { + fe_t X; + fe_t Y; + fe_t Z; +} pt_prj_t; + +/* Affine points */ +typedef struct { + fe_t X; + fe_t Y; +} pt_aff_t; + +/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */ +/*- + * MIT License + * + * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file). + * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Autogenerated: word_by_word_montgomery --static --use-value-barrier secp384r1 32 '2^384 - 2^128 - 2^96 + 2^32 - 1' */ +/* curve description: secp384r1 */ +/* machine_wordsize = 32 (from "32") */ +/* requested operations: (all) */ +/* m = 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff (from "2^384 - 2^128 - 2^96 + 2^32 - 1") */ +/* */ +/* NOTE: In addition to the bounds specified above each function, all */ +/* functions synthesized for this Montgomery arithmetic require the */ +/* input to be strictly less than the prime modulus (m), and also */ +/* require the input to be in the unique saturated representation. */ +/* All functions also ensure that these two properties are true of */ +/* return values. */ +/* */ +/* Computed values: */ +/* eval z = z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */ +/* twos_complement_eval z = let x1 := z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) in */ +/* if x1 & (2^384-1) < 2^383 then x1 & (2^384-1) else (x1 & (2^384-1)) - 2^384 */ + +#include +typedef unsigned char fiat_secp384r1_uint1; +typedef signed char fiat_secp384r1_int1; +#ifdef __GNUC__ +#define FIAT_SECP384R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP384R1_FIAT_INLINE +#endif + +/* The type fiat_secp384r1_montgomery_domain_field_element is a field element in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ +typedef uint32_t fiat_secp384r1_montgomery_domain_field_element[12]; + +/* The type fiat_secp384r1_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ +typedef uint32_t fiat_secp384r1_non_montgomery_domain_field_element[12]; + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + +#if !defined(FIAT_SECP384R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +static __inline__ uint32_t +fiat_secp384r1_value_barrier_u32(uint32_t a) +{ + __asm__("" + : "+r"(a) + : /* no inputs */); + return a; +} +#else +#define fiat_secp384r1_value_barrier_u32(x) (x) +#endif + +/* + * The function fiat_secp384r1_addcarryx_u32 is an addition with carry. + * + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^32 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^32⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffff] + * arg3: [0x0 ~> 0xffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp384r1_addcarryx_u32(uint32_t *out1, + fiat_secp384r1_uint1 *out2, + fiat_secp384r1_uint1 arg1, + uint32_t arg2, uint32_t arg3) +{ + uint64_t x1; + uint32_t x2; + fiat_secp384r1_uint1 x3; + x1 = ((arg1 + (uint64_t)arg2) + arg3); + x2 = (uint32_t)(x1 & UINT32_C(0xffffffff)); + x3 = (fiat_secp384r1_uint1)(x1 >> 32); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp384r1_subborrowx_u32 is a subtraction with borrow. + * + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^32 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^32⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffff] + * arg3: [0x0 ~> 0xffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp384r1_subborrowx_u32(uint32_t *out1, + fiat_secp384r1_uint1 *out2, + fiat_secp384r1_uint1 arg1, + uint32_t arg2, uint32_t arg3) +{ + int64_t x1; + fiat_secp384r1_int1 x2; + uint32_t x3; + x1 = ((arg2 - (int64_t)arg1) - arg3); + x2 = (fiat_secp384r1_int1)(x1 >> 32); + x3 = (uint32_t)(x1 & UINT32_C(0xffffffff)); + *out1 = x3; + *out2 = (fiat_secp384r1_uint1)(0x0 - x2); +} + +/* + * The function fiat_secp384r1_mulx_u32 is a multiplication, returning the full double-width result. + * + * Postconditions: + * out1 = (arg1 * arg2) mod 2^32 + * out2 = ⌊arg1 * arg2 / 2^32⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffff] + * arg2: [0x0 ~> 0xffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + * out2: [0x0 ~> 0xffffffff] + */ +static void +fiat_secp384r1_mulx_u32(uint32_t *out1, uint32_t *out2, + uint32_t arg1, uint32_t arg2) +{ + uint64_t x1; + uint32_t x2; + uint32_t x3; + x1 = ((uint64_t)arg1 * arg2); + x2 = (uint32_t)(x1 & UINT32_C(0xffffffff)); + x3 = (uint32_t)(x1 >> 32); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp384r1_cmovznz_u32 is a single-word conditional move. + * + * Postconditions: + * out1 = (if arg1 = 0 then arg2 else arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffff] + * arg3: [0x0 ~> 0xffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + */ +static void +fiat_secp384r1_cmovznz_u32(uint32_t *out1, + fiat_secp384r1_uint1 arg1, uint32_t arg2, + uint32_t arg3) +{ + fiat_secp384r1_uint1 x1; + uint32_t x2; + uint32_t x3; + x1 = (!(!arg1)); + x2 = ((fiat_secp384r1_int1)(0x0 - x1) & UINT32_C(0xffffffff)); + x3 = ((fiat_secp384r1_value_barrier_u32(x2) & arg3) | + (fiat_secp384r1_value_barrier_u32((~x2)) & arg2)); + *out1 = x3; +} + +/* + * The function fiat_secp384r1_mul multiplies two field elements in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_mul( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + fiat_secp384r1_uint1 x38; + uint32_t x39; + fiat_secp384r1_uint1 x40; + uint32_t x41; + fiat_secp384r1_uint1 x42; + uint32_t x43; + fiat_secp384r1_uint1 x44; + uint32_t x45; + fiat_secp384r1_uint1 x46; + uint32_t x47; + fiat_secp384r1_uint1 x48; + uint32_t x49; + fiat_secp384r1_uint1 x50; + uint32_t x51; + fiat_secp384r1_uint1 x52; + uint32_t x53; + fiat_secp384r1_uint1 x54; + uint32_t x55; + fiat_secp384r1_uint1 x56; + uint32_t x57; + fiat_secp384r1_uint1 x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + uint32_t x63; + uint32_t x64; + uint32_t x65; + uint32_t x66; + uint32_t x67; + uint32_t x68; + uint32_t x69; + uint32_t x70; + uint32_t x71; + uint32_t x72; + uint32_t x73; + uint32_t x74; + uint32_t x75; + uint32_t x76; + uint32_t x77; + uint32_t x78; + uint32_t x79; + uint32_t x80; + fiat_secp384r1_uint1 x81; + uint32_t x82; + fiat_secp384r1_uint1 x83; + uint32_t x84; + fiat_secp384r1_uint1 x85; + uint32_t x86; + fiat_secp384r1_uint1 x87; + uint32_t x88; + fiat_secp384r1_uint1 x89; + uint32_t x90; + fiat_secp384r1_uint1 x91; + uint32_t x92; + fiat_secp384r1_uint1 x93; + uint32_t x94; + fiat_secp384r1_uint1 x95; + uint32_t x96; + uint32_t x97; + fiat_secp384r1_uint1 x98; + uint32_t x99; + fiat_secp384r1_uint1 x100; + uint32_t x101; + fiat_secp384r1_uint1 x102; + uint32_t x103; + fiat_secp384r1_uint1 x104; + uint32_t x105; + fiat_secp384r1_uint1 x106; + uint32_t x107; + fiat_secp384r1_uint1 x108; + uint32_t x109; + fiat_secp384r1_uint1 x110; + uint32_t x111; + fiat_secp384r1_uint1 x112; + uint32_t x113; + fiat_secp384r1_uint1 x114; + uint32_t x115; + fiat_secp384r1_uint1 x116; + uint32_t x117; + fiat_secp384r1_uint1 x118; + uint32_t x119; + fiat_secp384r1_uint1 x120; + uint32_t x121; + fiat_secp384r1_uint1 x122; + uint32_t x123; + uint32_t x124; + uint32_t x125; + uint32_t x126; + uint32_t x127; + uint32_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint32_t x133; + uint32_t x134; + uint32_t x135; + uint32_t x136; + uint32_t x137; + uint32_t x138; + uint32_t x139; + uint32_t x140; + uint32_t x141; + uint32_t x142; + uint32_t x143; + uint32_t x144; + uint32_t x145; + uint32_t x146; + uint32_t x147; + fiat_secp384r1_uint1 x148; + uint32_t x149; + fiat_secp384r1_uint1 x150; + uint32_t x151; + fiat_secp384r1_uint1 x152; + uint32_t x153; + fiat_secp384r1_uint1 x154; + uint32_t x155; + fiat_secp384r1_uint1 x156; + uint32_t x157; + fiat_secp384r1_uint1 x158; + uint32_t x159; + fiat_secp384r1_uint1 x160; + uint32_t x161; + fiat_secp384r1_uint1 x162; + uint32_t x163; + fiat_secp384r1_uint1 x164; + uint32_t x165; + fiat_secp384r1_uint1 x166; + uint32_t x167; + fiat_secp384r1_uint1 x168; + uint32_t x169; + uint32_t x170; + fiat_secp384r1_uint1 x171; + uint32_t x172; + fiat_secp384r1_uint1 x173; + uint32_t x174; + fiat_secp384r1_uint1 x175; + uint32_t x176; + fiat_secp384r1_uint1 x177; + uint32_t x178; + fiat_secp384r1_uint1 x179; + uint32_t x180; + fiat_secp384r1_uint1 x181; + uint32_t x182; + fiat_secp384r1_uint1 x183; + uint32_t x184; + fiat_secp384r1_uint1 x185; + uint32_t x186; + fiat_secp384r1_uint1 x187; + uint32_t x188; + fiat_secp384r1_uint1 x189; + uint32_t x190; + fiat_secp384r1_uint1 x191; + uint32_t x192; + fiat_secp384r1_uint1 x193; + uint32_t x194; + fiat_secp384r1_uint1 x195; + uint32_t x196; + uint32_t x197; + uint32_t x198; + uint32_t x199; + uint32_t x200; + uint32_t x201; + uint32_t x202; + uint32_t x203; + uint32_t x204; + uint32_t x205; + uint32_t x206; + uint32_t x207; + uint32_t x208; + uint32_t x209; + uint32_t x210; + uint32_t x211; + uint32_t x212; + uint32_t x213; + uint32_t x214; + uint32_t x215; + uint32_t x216; + fiat_secp384r1_uint1 x217; + uint32_t x218; + fiat_secp384r1_uint1 x219; + uint32_t x220; + fiat_secp384r1_uint1 x221; + uint32_t x222; + fiat_secp384r1_uint1 x223; + uint32_t x224; + fiat_secp384r1_uint1 x225; + uint32_t x226; + fiat_secp384r1_uint1 x227; + uint32_t x228; + fiat_secp384r1_uint1 x229; + uint32_t x230; + fiat_secp384r1_uint1 x231; + uint32_t x232; + uint32_t x233; + fiat_secp384r1_uint1 x234; + uint32_t x235; + fiat_secp384r1_uint1 x236; + uint32_t x237; + fiat_secp384r1_uint1 x238; + uint32_t x239; + fiat_secp384r1_uint1 x240; + uint32_t x241; + fiat_secp384r1_uint1 x242; + uint32_t x243; + fiat_secp384r1_uint1 x244; + uint32_t x245; + fiat_secp384r1_uint1 x246; + uint32_t x247; + fiat_secp384r1_uint1 x248; + uint32_t x249; + fiat_secp384r1_uint1 x250; + uint32_t x251; + fiat_secp384r1_uint1 x252; + uint32_t x253; + fiat_secp384r1_uint1 x254; + uint32_t x255; + fiat_secp384r1_uint1 x256; + uint32_t x257; + fiat_secp384r1_uint1 x258; + uint32_t x259; + uint32_t x260; + uint32_t x261; + uint32_t x262; + uint32_t x263; + uint32_t x264; + uint32_t x265; + uint32_t x266; + uint32_t x267; + uint32_t x268; + uint32_t x269; + uint32_t x270; + uint32_t x271; + uint32_t x272; + uint32_t x273; + uint32_t x274; + uint32_t x275; + uint32_t x276; + uint32_t x277; + uint32_t x278; + uint32_t x279; + uint32_t x280; + uint32_t x281; + uint32_t x282; + uint32_t x283; + uint32_t x284; + fiat_secp384r1_uint1 x285; + uint32_t x286; + fiat_secp384r1_uint1 x287; + uint32_t x288; + fiat_secp384r1_uint1 x289; + uint32_t x290; + fiat_secp384r1_uint1 x291; + uint32_t x292; + fiat_secp384r1_uint1 x293; + uint32_t x294; + fiat_secp384r1_uint1 x295; + uint32_t x296; + fiat_secp384r1_uint1 x297; + uint32_t x298; + fiat_secp384r1_uint1 x299; + uint32_t x300; + fiat_secp384r1_uint1 x301; + uint32_t x302; + fiat_secp384r1_uint1 x303; + uint32_t x304; + fiat_secp384r1_uint1 x305; + uint32_t x306; + uint32_t x307; + fiat_secp384r1_uint1 x308; + uint32_t x309; + fiat_secp384r1_uint1 x310; + uint32_t x311; + fiat_secp384r1_uint1 x312; + uint32_t x313; + fiat_secp384r1_uint1 x314; + uint32_t x315; + fiat_secp384r1_uint1 x316; + uint32_t x317; + fiat_secp384r1_uint1 x318; + uint32_t x319; + fiat_secp384r1_uint1 x320; + uint32_t x321; + fiat_secp384r1_uint1 x322; + uint32_t x323; + fiat_secp384r1_uint1 x324; + uint32_t x325; + fiat_secp384r1_uint1 x326; + uint32_t x327; + fiat_secp384r1_uint1 x328; + uint32_t x329; + fiat_secp384r1_uint1 x330; + uint32_t x331; + fiat_secp384r1_uint1 x332; + uint32_t x333; + uint32_t x334; + uint32_t x335; + uint32_t x336; + uint32_t x337; + uint32_t x338; + uint32_t x339; + uint32_t x340; + uint32_t x341; + uint32_t x342; + uint32_t x343; + uint32_t x344; + uint32_t x345; + uint32_t x346; + uint32_t x347; + uint32_t x348; + uint32_t x349; + uint32_t x350; + uint32_t x351; + uint32_t x352; + uint32_t x353; + fiat_secp384r1_uint1 x354; + uint32_t x355; + fiat_secp384r1_uint1 x356; + uint32_t x357; + fiat_secp384r1_uint1 x358; + uint32_t x359; + fiat_secp384r1_uint1 x360; + uint32_t x361; + fiat_secp384r1_uint1 x362; + uint32_t x363; + fiat_secp384r1_uint1 x364; + uint32_t x365; + fiat_secp384r1_uint1 x366; + uint32_t x367; + fiat_secp384r1_uint1 x368; + uint32_t x369; + uint32_t x370; + fiat_secp384r1_uint1 x371; + uint32_t x372; + fiat_secp384r1_uint1 x373; + uint32_t x374; + fiat_secp384r1_uint1 x375; + uint32_t x376; + fiat_secp384r1_uint1 x377; + uint32_t x378; + fiat_secp384r1_uint1 x379; + uint32_t x380; + fiat_secp384r1_uint1 x381; + uint32_t x382; + fiat_secp384r1_uint1 x383; + uint32_t x384; + fiat_secp384r1_uint1 x385; + uint32_t x386; + fiat_secp384r1_uint1 x387; + uint32_t x388; + fiat_secp384r1_uint1 x389; + uint32_t x390; + fiat_secp384r1_uint1 x391; + uint32_t x392; + fiat_secp384r1_uint1 x393; + uint32_t x394; + fiat_secp384r1_uint1 x395; + uint32_t x396; + uint32_t x397; + uint32_t x398; + uint32_t x399; + uint32_t x400; + uint32_t x401; + uint32_t x402; + uint32_t x403; + uint32_t x404; + uint32_t x405; + uint32_t x406; + uint32_t x407; + uint32_t x408; + uint32_t x409; + uint32_t x410; + uint32_t x411; + uint32_t x412; + uint32_t x413; + uint32_t x414; + uint32_t x415; + uint32_t x416; + uint32_t x417; + uint32_t x418; + uint32_t x419; + uint32_t x420; + uint32_t x421; + fiat_secp384r1_uint1 x422; + uint32_t x423; + fiat_secp384r1_uint1 x424; + uint32_t x425; + fiat_secp384r1_uint1 x426; + uint32_t x427; + fiat_secp384r1_uint1 x428; + uint32_t x429; + fiat_secp384r1_uint1 x430; + uint32_t x431; + fiat_secp384r1_uint1 x432; + uint32_t x433; + fiat_secp384r1_uint1 x434; + uint32_t x435; + fiat_secp384r1_uint1 x436; + uint32_t x437; + fiat_secp384r1_uint1 x438; + uint32_t x439; + fiat_secp384r1_uint1 x440; + uint32_t x441; + fiat_secp384r1_uint1 x442; + uint32_t x443; + uint32_t x444; + fiat_secp384r1_uint1 x445; + uint32_t x446; + fiat_secp384r1_uint1 x447; + uint32_t x448; + fiat_secp384r1_uint1 x449; + uint32_t x450; + fiat_secp384r1_uint1 x451; + uint32_t x452; + fiat_secp384r1_uint1 x453; + uint32_t x454; + fiat_secp384r1_uint1 x455; + uint32_t x456; + fiat_secp384r1_uint1 x457; + uint32_t x458; + fiat_secp384r1_uint1 x459; + uint32_t x460; + fiat_secp384r1_uint1 x461; + uint32_t x462; + fiat_secp384r1_uint1 x463; + uint32_t x464; + fiat_secp384r1_uint1 x465; + uint32_t x466; + fiat_secp384r1_uint1 x467; + uint32_t x468; + fiat_secp384r1_uint1 x469; + uint32_t x470; + uint32_t x471; + uint32_t x472; + uint32_t x473; + uint32_t x474; + uint32_t x475; + uint32_t x476; + uint32_t x477; + uint32_t x478; + uint32_t x479; + uint32_t x480; + uint32_t x481; + uint32_t x482; + uint32_t x483; + uint32_t x484; + uint32_t x485; + uint32_t x486; + uint32_t x487; + uint32_t x488; + uint32_t x489; + uint32_t x490; + fiat_secp384r1_uint1 x491; + uint32_t x492; + fiat_secp384r1_uint1 x493; + uint32_t x494; + fiat_secp384r1_uint1 x495; + uint32_t x496; + fiat_secp384r1_uint1 x497; + uint32_t x498; + fiat_secp384r1_uint1 x499; + uint32_t x500; + fiat_secp384r1_uint1 x501; + uint32_t x502; + fiat_secp384r1_uint1 x503; + uint32_t x504; + fiat_secp384r1_uint1 x505; + uint32_t x506; + uint32_t x507; + fiat_secp384r1_uint1 x508; + uint32_t x509; + fiat_secp384r1_uint1 x510; + uint32_t x511; + fiat_secp384r1_uint1 x512; + uint32_t x513; + fiat_secp384r1_uint1 x514; + uint32_t x515; + fiat_secp384r1_uint1 x516; + uint32_t x517; + fiat_secp384r1_uint1 x518; + uint32_t x519; + fiat_secp384r1_uint1 x520; + uint32_t x521; + fiat_secp384r1_uint1 x522; + uint32_t x523; + fiat_secp384r1_uint1 x524; + uint32_t x525; + fiat_secp384r1_uint1 x526; + uint32_t x527; + fiat_secp384r1_uint1 x528; + uint32_t x529; + fiat_secp384r1_uint1 x530; + uint32_t x531; + fiat_secp384r1_uint1 x532; + uint32_t x533; + uint32_t x534; + uint32_t x535; + uint32_t x536; + uint32_t x537; + uint32_t x538; + uint32_t x539; + uint32_t x540; + uint32_t x541; + uint32_t x542; + uint32_t x543; + uint32_t x544; + uint32_t x545; + uint32_t x546; + uint32_t x547; + uint32_t x548; + uint32_t x549; + uint32_t x550; + uint32_t x551; + uint32_t x552; + uint32_t x553; + uint32_t x554; + uint32_t x555; + uint32_t x556; + uint32_t x557; + uint32_t x558; + fiat_secp384r1_uint1 x559; + uint32_t x560; + fiat_secp384r1_uint1 x561; + uint32_t x562; + fiat_secp384r1_uint1 x563; + uint32_t x564; + fiat_secp384r1_uint1 x565; + uint32_t x566; + fiat_secp384r1_uint1 x567; + uint32_t x568; + fiat_secp384r1_uint1 x569; + uint32_t x570; + fiat_secp384r1_uint1 x571; + uint32_t x572; + fiat_secp384r1_uint1 x573; + uint32_t x574; + fiat_secp384r1_uint1 x575; + uint32_t x576; + fiat_secp384r1_uint1 x577; + uint32_t x578; + fiat_secp384r1_uint1 x579; + uint32_t x580; + uint32_t x581; + fiat_secp384r1_uint1 x582; + uint32_t x583; + fiat_secp384r1_uint1 x584; + uint32_t x585; + fiat_secp384r1_uint1 x586; + uint32_t x587; + fiat_secp384r1_uint1 x588; + uint32_t x589; + fiat_secp384r1_uint1 x590; + uint32_t x591; + fiat_secp384r1_uint1 x592; + uint32_t x593; + fiat_secp384r1_uint1 x594; + uint32_t x595; + fiat_secp384r1_uint1 x596; + uint32_t x597; + fiat_secp384r1_uint1 x598; + uint32_t x599; + fiat_secp384r1_uint1 x600; + uint32_t x601; + fiat_secp384r1_uint1 x602; + uint32_t x603; + fiat_secp384r1_uint1 x604; + uint32_t x605; + fiat_secp384r1_uint1 x606; + uint32_t x607; + uint32_t x608; + uint32_t x609; + uint32_t x610; + uint32_t x611; + uint32_t x612; + uint32_t x613; + uint32_t x614; + uint32_t x615; + uint32_t x616; + uint32_t x617; + uint32_t x618; + uint32_t x619; + uint32_t x620; + uint32_t x621; + uint32_t x622; + uint32_t x623; + uint32_t x624; + uint32_t x625; + uint32_t x626; + uint32_t x627; + fiat_secp384r1_uint1 x628; + uint32_t x629; + fiat_secp384r1_uint1 x630; + uint32_t x631; + fiat_secp384r1_uint1 x632; + uint32_t x633; + fiat_secp384r1_uint1 x634; + uint32_t x635; + fiat_secp384r1_uint1 x636; + uint32_t x637; + fiat_secp384r1_uint1 x638; + uint32_t x639; + fiat_secp384r1_uint1 x640; + uint32_t x641; + fiat_secp384r1_uint1 x642; + uint32_t x643; + uint32_t x644; + fiat_secp384r1_uint1 x645; + uint32_t x646; + fiat_secp384r1_uint1 x647; + uint32_t x648; + fiat_secp384r1_uint1 x649; + uint32_t x650; + fiat_secp384r1_uint1 x651; + uint32_t x652; + fiat_secp384r1_uint1 x653; + uint32_t x654; + fiat_secp384r1_uint1 x655; + uint32_t x656; + fiat_secp384r1_uint1 x657; + uint32_t x658; + fiat_secp384r1_uint1 x659; + uint32_t x660; + fiat_secp384r1_uint1 x661; + uint32_t x662; + fiat_secp384r1_uint1 x663; + uint32_t x664; + fiat_secp384r1_uint1 x665; + uint32_t x666; + fiat_secp384r1_uint1 x667; + uint32_t x668; + fiat_secp384r1_uint1 x669; + uint32_t x670; + uint32_t x671; + uint32_t x672; + uint32_t x673; + uint32_t x674; + uint32_t x675; + uint32_t x676; + uint32_t x677; + uint32_t x678; + uint32_t x679; + uint32_t x680; + uint32_t x681; + uint32_t x682; + uint32_t x683; + uint32_t x684; + uint32_t x685; + uint32_t x686; + uint32_t x687; + uint32_t x688; + uint32_t x689; + uint32_t x690; + uint32_t x691; + uint32_t x692; + uint32_t x693; + uint32_t x694; + uint32_t x695; + fiat_secp384r1_uint1 x696; + uint32_t x697; + fiat_secp384r1_uint1 x698; + uint32_t x699; + fiat_secp384r1_uint1 x700; + uint32_t x701; + fiat_secp384r1_uint1 x702; + uint32_t x703; + fiat_secp384r1_uint1 x704; + uint32_t x705; + fiat_secp384r1_uint1 x706; + uint32_t x707; + fiat_secp384r1_uint1 x708; + uint32_t x709; + fiat_secp384r1_uint1 x710; + uint32_t x711; + fiat_secp384r1_uint1 x712; + uint32_t x713; + fiat_secp384r1_uint1 x714; + uint32_t x715; + fiat_secp384r1_uint1 x716; + uint32_t x717; + uint32_t x718; + fiat_secp384r1_uint1 x719; + uint32_t x720; + fiat_secp384r1_uint1 x721; + uint32_t x722; + fiat_secp384r1_uint1 x723; + uint32_t x724; + fiat_secp384r1_uint1 x725; + uint32_t x726; + fiat_secp384r1_uint1 x727; + uint32_t x728; + fiat_secp384r1_uint1 x729; + uint32_t x730; + fiat_secp384r1_uint1 x731; + uint32_t x732; + fiat_secp384r1_uint1 x733; + uint32_t x734; + fiat_secp384r1_uint1 x735; + uint32_t x736; + fiat_secp384r1_uint1 x737; + uint32_t x738; + fiat_secp384r1_uint1 x739; + uint32_t x740; + fiat_secp384r1_uint1 x741; + uint32_t x742; + fiat_secp384r1_uint1 x743; + uint32_t x744; + uint32_t x745; + uint32_t x746; + uint32_t x747; + uint32_t x748; + uint32_t x749; + uint32_t x750; + uint32_t x751; + uint32_t x752; + uint32_t x753; + uint32_t x754; + uint32_t x755; + uint32_t x756; + uint32_t x757; + uint32_t x758; + uint32_t x759; + uint32_t x760; + uint32_t x761; + uint32_t x762; + uint32_t x763; + uint32_t x764; + fiat_secp384r1_uint1 x765; + uint32_t x766; + fiat_secp384r1_uint1 x767; + uint32_t x768; + fiat_secp384r1_uint1 x769; + uint32_t x770; + fiat_secp384r1_uint1 x771; + uint32_t x772; + fiat_secp384r1_uint1 x773; + uint32_t x774; + fiat_secp384r1_uint1 x775; + uint32_t x776; + fiat_secp384r1_uint1 x777; + uint32_t x778; + fiat_secp384r1_uint1 x779; + uint32_t x780; + uint32_t x781; + fiat_secp384r1_uint1 x782; + uint32_t x783; + fiat_secp384r1_uint1 x784; + uint32_t x785; + fiat_secp384r1_uint1 x786; + uint32_t x787; + fiat_secp384r1_uint1 x788; + uint32_t x789; + fiat_secp384r1_uint1 x790; + uint32_t x791; + fiat_secp384r1_uint1 x792; + uint32_t x793; + fiat_secp384r1_uint1 x794; + uint32_t x795; + fiat_secp384r1_uint1 x796; + uint32_t x797; + fiat_secp384r1_uint1 x798; + uint32_t x799; + fiat_secp384r1_uint1 x800; + uint32_t x801; + fiat_secp384r1_uint1 x802; + uint32_t x803; + fiat_secp384r1_uint1 x804; + uint32_t x805; + fiat_secp384r1_uint1 x806; + uint32_t x807; + uint32_t x808; + uint32_t x809; + uint32_t x810; + uint32_t x811; + uint32_t x812; + uint32_t x813; + uint32_t x814; + uint32_t x815; + uint32_t x816; + uint32_t x817; + uint32_t x818; + uint32_t x819; + uint32_t x820; + uint32_t x821; + uint32_t x822; + uint32_t x823; + uint32_t x824; + uint32_t x825; + uint32_t x826; + uint32_t x827; + uint32_t x828; + uint32_t x829; + uint32_t x830; + uint32_t x831; + uint32_t x832; + fiat_secp384r1_uint1 x833; + uint32_t x834; + fiat_secp384r1_uint1 x835; + uint32_t x836; + fiat_secp384r1_uint1 x837; + uint32_t x838; + fiat_secp384r1_uint1 x839; + uint32_t x840; + fiat_secp384r1_uint1 x841; + uint32_t x842; + fiat_secp384r1_uint1 x843; + uint32_t x844; + fiat_secp384r1_uint1 x845; + uint32_t x846; + fiat_secp384r1_uint1 x847; + uint32_t x848; + fiat_secp384r1_uint1 x849; + uint32_t x850; + fiat_secp384r1_uint1 x851; + uint32_t x852; + fiat_secp384r1_uint1 x853; + uint32_t x854; + uint32_t x855; + fiat_secp384r1_uint1 x856; + uint32_t x857; + fiat_secp384r1_uint1 x858; + uint32_t x859; + fiat_secp384r1_uint1 x860; + uint32_t x861; + fiat_secp384r1_uint1 x862; + uint32_t x863; + fiat_secp384r1_uint1 x864; + uint32_t x865; + fiat_secp384r1_uint1 x866; + uint32_t x867; + fiat_secp384r1_uint1 x868; + uint32_t x869; + fiat_secp384r1_uint1 x870; + uint32_t x871; + fiat_secp384r1_uint1 x872; + uint32_t x873; + fiat_secp384r1_uint1 x874; + uint32_t x875; + fiat_secp384r1_uint1 x876; + uint32_t x877; + fiat_secp384r1_uint1 x878; + uint32_t x879; + fiat_secp384r1_uint1 x880; + uint32_t x881; + uint32_t x882; + uint32_t x883; + uint32_t x884; + uint32_t x885; + uint32_t x886; + uint32_t x887; + uint32_t x888; + uint32_t x889; + uint32_t x890; + uint32_t x891; + uint32_t x892; + uint32_t x893; + uint32_t x894; + uint32_t x895; + uint32_t x896; + uint32_t x897; + uint32_t x898; + uint32_t x899; + uint32_t x900; + uint32_t x901; + fiat_secp384r1_uint1 x902; + uint32_t x903; + fiat_secp384r1_uint1 x904; + uint32_t x905; + fiat_secp384r1_uint1 x906; + uint32_t x907; + fiat_secp384r1_uint1 x908; + uint32_t x909; + fiat_secp384r1_uint1 x910; + uint32_t x911; + fiat_secp384r1_uint1 x912; + uint32_t x913; + fiat_secp384r1_uint1 x914; + uint32_t x915; + fiat_secp384r1_uint1 x916; + uint32_t x917; + uint32_t x918; + fiat_secp384r1_uint1 x919; + uint32_t x920; + fiat_secp384r1_uint1 x921; + uint32_t x922; + fiat_secp384r1_uint1 x923; + uint32_t x924; + fiat_secp384r1_uint1 x925; + uint32_t x926; + fiat_secp384r1_uint1 x927; + uint32_t x928; + fiat_secp384r1_uint1 x929; + uint32_t x930; + fiat_secp384r1_uint1 x931; + uint32_t x932; + fiat_secp384r1_uint1 x933; + uint32_t x934; + fiat_secp384r1_uint1 x935; + uint32_t x936; + fiat_secp384r1_uint1 x937; + uint32_t x938; + fiat_secp384r1_uint1 x939; + uint32_t x940; + fiat_secp384r1_uint1 x941; + uint32_t x942; + fiat_secp384r1_uint1 x943; + uint32_t x944; + uint32_t x945; + uint32_t x946; + uint32_t x947; + uint32_t x948; + uint32_t x949; + uint32_t x950; + uint32_t x951; + uint32_t x952; + uint32_t x953; + uint32_t x954; + uint32_t x955; + uint32_t x956; + uint32_t x957; + uint32_t x958; + uint32_t x959; + uint32_t x960; + uint32_t x961; + uint32_t x962; + uint32_t x963; + uint32_t x964; + uint32_t x965; + uint32_t x966; + uint32_t x967; + uint32_t x968; + uint32_t x969; + fiat_secp384r1_uint1 x970; + uint32_t x971; + fiat_secp384r1_uint1 x972; + uint32_t x973; + fiat_secp384r1_uint1 x974; + uint32_t x975; + fiat_secp384r1_uint1 x976; + uint32_t x977; + fiat_secp384r1_uint1 x978; + uint32_t x979; + fiat_secp384r1_uint1 x980; + uint32_t x981; + fiat_secp384r1_uint1 x982; + uint32_t x983; + fiat_secp384r1_uint1 x984; + uint32_t x985; + fiat_secp384r1_uint1 x986; + uint32_t x987; + fiat_secp384r1_uint1 x988; + uint32_t x989; + fiat_secp384r1_uint1 x990; + uint32_t x991; + uint32_t x992; + fiat_secp384r1_uint1 x993; + uint32_t x994; + fiat_secp384r1_uint1 x995; + uint32_t x996; + fiat_secp384r1_uint1 x997; + uint32_t x998; + fiat_secp384r1_uint1 x999; + uint32_t x1000; + fiat_secp384r1_uint1 x1001; + uint32_t x1002; + fiat_secp384r1_uint1 x1003; + uint32_t x1004; + fiat_secp384r1_uint1 x1005; + uint32_t x1006; + fiat_secp384r1_uint1 x1007; + uint32_t x1008; + fiat_secp384r1_uint1 x1009; + uint32_t x1010; + fiat_secp384r1_uint1 x1011; + uint32_t x1012; + fiat_secp384r1_uint1 x1013; + uint32_t x1014; + fiat_secp384r1_uint1 x1015; + uint32_t x1016; + fiat_secp384r1_uint1 x1017; + uint32_t x1018; + uint32_t x1019; + uint32_t x1020; + uint32_t x1021; + uint32_t x1022; + uint32_t x1023; + uint32_t x1024; + uint32_t x1025; + uint32_t x1026; + uint32_t x1027; + uint32_t x1028; + uint32_t x1029; + uint32_t x1030; + uint32_t x1031; + uint32_t x1032; + uint32_t x1033; + uint32_t x1034; + uint32_t x1035; + uint32_t x1036; + uint32_t x1037; + uint32_t x1038; + fiat_secp384r1_uint1 x1039; + uint32_t x1040; + fiat_secp384r1_uint1 x1041; + uint32_t x1042; + fiat_secp384r1_uint1 x1043; + uint32_t x1044; + fiat_secp384r1_uint1 x1045; + uint32_t x1046; + fiat_secp384r1_uint1 x1047; + uint32_t x1048; + fiat_secp384r1_uint1 x1049; + uint32_t x1050; + fiat_secp384r1_uint1 x1051; + uint32_t x1052; + fiat_secp384r1_uint1 x1053; + uint32_t x1054; + uint32_t x1055; + fiat_secp384r1_uint1 x1056; + uint32_t x1057; + fiat_secp384r1_uint1 x1058; + uint32_t x1059; + fiat_secp384r1_uint1 x1060; + uint32_t x1061; + fiat_secp384r1_uint1 x1062; + uint32_t x1063; + fiat_secp384r1_uint1 x1064; + uint32_t x1065; + fiat_secp384r1_uint1 x1066; + uint32_t x1067; + fiat_secp384r1_uint1 x1068; + uint32_t x1069; + fiat_secp384r1_uint1 x1070; + uint32_t x1071; + fiat_secp384r1_uint1 x1072; + uint32_t x1073; + fiat_secp384r1_uint1 x1074; + uint32_t x1075; + fiat_secp384r1_uint1 x1076; + uint32_t x1077; + fiat_secp384r1_uint1 x1078; + uint32_t x1079; + fiat_secp384r1_uint1 x1080; + uint32_t x1081; + uint32_t x1082; + uint32_t x1083; + uint32_t x1084; + uint32_t x1085; + uint32_t x1086; + uint32_t x1087; + uint32_t x1088; + uint32_t x1089; + uint32_t x1090; + uint32_t x1091; + uint32_t x1092; + uint32_t x1093; + uint32_t x1094; + uint32_t x1095; + uint32_t x1096; + uint32_t x1097; + uint32_t x1098; + uint32_t x1099; + uint32_t x1100; + uint32_t x1101; + uint32_t x1102; + uint32_t x1103; + uint32_t x1104; + uint32_t x1105; + uint32_t x1106; + fiat_secp384r1_uint1 x1107; + uint32_t x1108; + fiat_secp384r1_uint1 x1109; + uint32_t x1110; + fiat_secp384r1_uint1 x1111; + uint32_t x1112; + fiat_secp384r1_uint1 x1113; + uint32_t x1114; + fiat_secp384r1_uint1 x1115; + uint32_t x1116; + fiat_secp384r1_uint1 x1117; + uint32_t x1118; + fiat_secp384r1_uint1 x1119; + uint32_t x1120; + fiat_secp384r1_uint1 x1121; + uint32_t x1122; + fiat_secp384r1_uint1 x1123; + uint32_t x1124; + fiat_secp384r1_uint1 x1125; + uint32_t x1126; + fiat_secp384r1_uint1 x1127; + uint32_t x1128; + uint32_t x1129; + fiat_secp384r1_uint1 x1130; + uint32_t x1131; + fiat_secp384r1_uint1 x1132; + uint32_t x1133; + fiat_secp384r1_uint1 x1134; + uint32_t x1135; + fiat_secp384r1_uint1 x1136; + uint32_t x1137; + fiat_secp384r1_uint1 x1138; + uint32_t x1139; + fiat_secp384r1_uint1 x1140; + uint32_t x1141; + fiat_secp384r1_uint1 x1142; + uint32_t x1143; + fiat_secp384r1_uint1 x1144; + uint32_t x1145; + fiat_secp384r1_uint1 x1146; + uint32_t x1147; + fiat_secp384r1_uint1 x1148; + uint32_t x1149; + fiat_secp384r1_uint1 x1150; + uint32_t x1151; + fiat_secp384r1_uint1 x1152; + uint32_t x1153; + fiat_secp384r1_uint1 x1154; + uint32_t x1155; + uint32_t x1156; + uint32_t x1157; + uint32_t x1158; + uint32_t x1159; + uint32_t x1160; + uint32_t x1161; + uint32_t x1162; + uint32_t x1163; + uint32_t x1164; + uint32_t x1165; + uint32_t x1166; + uint32_t x1167; + uint32_t x1168; + uint32_t x1169; + uint32_t x1170; + uint32_t x1171; + uint32_t x1172; + uint32_t x1173; + uint32_t x1174; + uint32_t x1175; + fiat_secp384r1_uint1 x1176; + uint32_t x1177; + fiat_secp384r1_uint1 x1178; + uint32_t x1179; + fiat_secp384r1_uint1 x1180; + uint32_t x1181; + fiat_secp384r1_uint1 x1182; + uint32_t x1183; + fiat_secp384r1_uint1 x1184; + uint32_t x1185; + fiat_secp384r1_uint1 x1186; + uint32_t x1187; + fiat_secp384r1_uint1 x1188; + uint32_t x1189; + fiat_secp384r1_uint1 x1190; + uint32_t x1191; + uint32_t x1192; + fiat_secp384r1_uint1 x1193; + uint32_t x1194; + fiat_secp384r1_uint1 x1195; + uint32_t x1196; + fiat_secp384r1_uint1 x1197; + uint32_t x1198; + fiat_secp384r1_uint1 x1199; + uint32_t x1200; + fiat_secp384r1_uint1 x1201; + uint32_t x1202; + fiat_secp384r1_uint1 x1203; + uint32_t x1204; + fiat_secp384r1_uint1 x1205; + uint32_t x1206; + fiat_secp384r1_uint1 x1207; + uint32_t x1208; + fiat_secp384r1_uint1 x1209; + uint32_t x1210; + fiat_secp384r1_uint1 x1211; + uint32_t x1212; + fiat_secp384r1_uint1 x1213; + uint32_t x1214; + fiat_secp384r1_uint1 x1215; + uint32_t x1216; + fiat_secp384r1_uint1 x1217; + uint32_t x1218; + uint32_t x1219; + uint32_t x1220; + uint32_t x1221; + uint32_t x1222; + uint32_t x1223; + uint32_t x1224; + uint32_t x1225; + uint32_t x1226; + uint32_t x1227; + uint32_t x1228; + uint32_t x1229; + uint32_t x1230; + uint32_t x1231; + uint32_t x1232; + uint32_t x1233; + uint32_t x1234; + uint32_t x1235; + uint32_t x1236; + uint32_t x1237; + uint32_t x1238; + uint32_t x1239; + uint32_t x1240; + uint32_t x1241; + uint32_t x1242; + uint32_t x1243; + fiat_secp384r1_uint1 x1244; + uint32_t x1245; + fiat_secp384r1_uint1 x1246; + uint32_t x1247; + fiat_secp384r1_uint1 x1248; + uint32_t x1249; + fiat_secp384r1_uint1 x1250; + uint32_t x1251; + fiat_secp384r1_uint1 x1252; + uint32_t x1253; + fiat_secp384r1_uint1 x1254; + uint32_t x1255; + fiat_secp384r1_uint1 x1256; + uint32_t x1257; + fiat_secp384r1_uint1 x1258; + uint32_t x1259; + fiat_secp384r1_uint1 x1260; + uint32_t x1261; + fiat_secp384r1_uint1 x1262; + uint32_t x1263; + fiat_secp384r1_uint1 x1264; + uint32_t x1265; + uint32_t x1266; + fiat_secp384r1_uint1 x1267; + uint32_t x1268; + fiat_secp384r1_uint1 x1269; + uint32_t x1270; + fiat_secp384r1_uint1 x1271; + uint32_t x1272; + fiat_secp384r1_uint1 x1273; + uint32_t x1274; + fiat_secp384r1_uint1 x1275; + uint32_t x1276; + fiat_secp384r1_uint1 x1277; + uint32_t x1278; + fiat_secp384r1_uint1 x1279; + uint32_t x1280; + fiat_secp384r1_uint1 x1281; + uint32_t x1282; + fiat_secp384r1_uint1 x1283; + uint32_t x1284; + fiat_secp384r1_uint1 x1285; + uint32_t x1286; + fiat_secp384r1_uint1 x1287; + uint32_t x1288; + fiat_secp384r1_uint1 x1289; + uint32_t x1290; + fiat_secp384r1_uint1 x1291; + uint32_t x1292; + uint32_t x1293; + uint32_t x1294; + uint32_t x1295; + uint32_t x1296; + uint32_t x1297; + uint32_t x1298; + uint32_t x1299; + uint32_t x1300; + uint32_t x1301; + uint32_t x1302; + uint32_t x1303; + uint32_t x1304; + uint32_t x1305; + uint32_t x1306; + uint32_t x1307; + uint32_t x1308; + uint32_t x1309; + uint32_t x1310; + uint32_t x1311; + uint32_t x1312; + fiat_secp384r1_uint1 x1313; + uint32_t x1314; + fiat_secp384r1_uint1 x1315; + uint32_t x1316; + fiat_secp384r1_uint1 x1317; + uint32_t x1318; + fiat_secp384r1_uint1 x1319; + uint32_t x1320; + fiat_secp384r1_uint1 x1321; + uint32_t x1322; + fiat_secp384r1_uint1 x1323; + uint32_t x1324; + fiat_secp384r1_uint1 x1325; + uint32_t x1326; + fiat_secp384r1_uint1 x1327; + uint32_t x1328; + uint32_t x1329; + fiat_secp384r1_uint1 x1330; + uint32_t x1331; + fiat_secp384r1_uint1 x1332; + uint32_t x1333; + fiat_secp384r1_uint1 x1334; + uint32_t x1335; + fiat_secp384r1_uint1 x1336; + uint32_t x1337; + fiat_secp384r1_uint1 x1338; + uint32_t x1339; + fiat_secp384r1_uint1 x1340; + uint32_t x1341; + fiat_secp384r1_uint1 x1342; + uint32_t x1343; + fiat_secp384r1_uint1 x1344; + uint32_t x1345; + fiat_secp384r1_uint1 x1346; + uint32_t x1347; + fiat_secp384r1_uint1 x1348; + uint32_t x1349; + fiat_secp384r1_uint1 x1350; + uint32_t x1351; + fiat_secp384r1_uint1 x1352; + uint32_t x1353; + fiat_secp384r1_uint1 x1354; + uint32_t x1355; + uint32_t x1356; + uint32_t x1357; + uint32_t x1358; + uint32_t x1359; + uint32_t x1360; + uint32_t x1361; + uint32_t x1362; + uint32_t x1363; + uint32_t x1364; + uint32_t x1365; + uint32_t x1366; + uint32_t x1367; + uint32_t x1368; + uint32_t x1369; + uint32_t x1370; + uint32_t x1371; + uint32_t x1372; + uint32_t x1373; + uint32_t x1374; + uint32_t x1375; + uint32_t x1376; + uint32_t x1377; + uint32_t x1378; + uint32_t x1379; + uint32_t x1380; + fiat_secp384r1_uint1 x1381; + uint32_t x1382; + fiat_secp384r1_uint1 x1383; + uint32_t x1384; + fiat_secp384r1_uint1 x1385; + uint32_t x1386; + fiat_secp384r1_uint1 x1387; + uint32_t x1388; + fiat_secp384r1_uint1 x1389; + uint32_t x1390; + fiat_secp384r1_uint1 x1391; + uint32_t x1392; + fiat_secp384r1_uint1 x1393; + uint32_t x1394; + fiat_secp384r1_uint1 x1395; + uint32_t x1396; + fiat_secp384r1_uint1 x1397; + uint32_t x1398; + fiat_secp384r1_uint1 x1399; + uint32_t x1400; + fiat_secp384r1_uint1 x1401; + uint32_t x1402; + uint32_t x1403; + fiat_secp384r1_uint1 x1404; + uint32_t x1405; + fiat_secp384r1_uint1 x1406; + uint32_t x1407; + fiat_secp384r1_uint1 x1408; + uint32_t x1409; + fiat_secp384r1_uint1 x1410; + uint32_t x1411; + fiat_secp384r1_uint1 x1412; + uint32_t x1413; + fiat_secp384r1_uint1 x1414; + uint32_t x1415; + fiat_secp384r1_uint1 x1416; + uint32_t x1417; + fiat_secp384r1_uint1 x1418; + uint32_t x1419; + fiat_secp384r1_uint1 x1420; + uint32_t x1421; + fiat_secp384r1_uint1 x1422; + uint32_t x1423; + fiat_secp384r1_uint1 x1424; + uint32_t x1425; + fiat_secp384r1_uint1 x1426; + uint32_t x1427; + fiat_secp384r1_uint1 x1428; + uint32_t x1429; + uint32_t x1430; + uint32_t x1431; + uint32_t x1432; + uint32_t x1433; + uint32_t x1434; + uint32_t x1435; + uint32_t x1436; + uint32_t x1437; + uint32_t x1438; + uint32_t x1439; + uint32_t x1440; + uint32_t x1441; + uint32_t x1442; + uint32_t x1443; + uint32_t x1444; + uint32_t x1445; + uint32_t x1446; + uint32_t x1447; + uint32_t x1448; + uint32_t x1449; + fiat_secp384r1_uint1 x1450; + uint32_t x1451; + fiat_secp384r1_uint1 x1452; + uint32_t x1453; + fiat_secp384r1_uint1 x1454; + uint32_t x1455; + fiat_secp384r1_uint1 x1456; + uint32_t x1457; + fiat_secp384r1_uint1 x1458; + uint32_t x1459; + fiat_secp384r1_uint1 x1460; + uint32_t x1461; + fiat_secp384r1_uint1 x1462; + uint32_t x1463; + fiat_secp384r1_uint1 x1464; + uint32_t x1465; + uint32_t x1466; + fiat_secp384r1_uint1 x1467; + uint32_t x1468; + fiat_secp384r1_uint1 x1469; + uint32_t x1470; + fiat_secp384r1_uint1 x1471; + uint32_t x1472; + fiat_secp384r1_uint1 x1473; + uint32_t x1474; + fiat_secp384r1_uint1 x1475; + uint32_t x1476; + fiat_secp384r1_uint1 x1477; + uint32_t x1478; + fiat_secp384r1_uint1 x1479; + uint32_t x1480; + fiat_secp384r1_uint1 x1481; + uint32_t x1482; + fiat_secp384r1_uint1 x1483; + uint32_t x1484; + fiat_secp384r1_uint1 x1485; + uint32_t x1486; + fiat_secp384r1_uint1 x1487; + uint32_t x1488; + fiat_secp384r1_uint1 x1489; + uint32_t x1490; + fiat_secp384r1_uint1 x1491; + uint32_t x1492; + uint32_t x1493; + uint32_t x1494; + uint32_t x1495; + uint32_t x1496; + uint32_t x1497; + uint32_t x1498; + uint32_t x1499; + uint32_t x1500; + uint32_t x1501; + uint32_t x1502; + uint32_t x1503; + uint32_t x1504; + uint32_t x1505; + uint32_t x1506; + uint32_t x1507; + uint32_t x1508; + uint32_t x1509; + uint32_t x1510; + uint32_t x1511; + uint32_t x1512; + uint32_t x1513; + uint32_t x1514; + uint32_t x1515; + uint32_t x1516; + uint32_t x1517; + fiat_secp384r1_uint1 x1518; + uint32_t x1519; + fiat_secp384r1_uint1 x1520; + uint32_t x1521; + fiat_secp384r1_uint1 x1522; + uint32_t x1523; + fiat_secp384r1_uint1 x1524; + uint32_t x1525; + fiat_secp384r1_uint1 x1526; + uint32_t x1527; + fiat_secp384r1_uint1 x1528; + uint32_t x1529; + fiat_secp384r1_uint1 x1530; + uint32_t x1531; + fiat_secp384r1_uint1 x1532; + uint32_t x1533; + fiat_secp384r1_uint1 x1534; + uint32_t x1535; + fiat_secp384r1_uint1 x1536; + uint32_t x1537; + fiat_secp384r1_uint1 x1538; + uint32_t x1539; + uint32_t x1540; + fiat_secp384r1_uint1 x1541; + uint32_t x1542; + fiat_secp384r1_uint1 x1543; + uint32_t x1544; + fiat_secp384r1_uint1 x1545; + uint32_t x1546; + fiat_secp384r1_uint1 x1547; + uint32_t x1548; + fiat_secp384r1_uint1 x1549; + uint32_t x1550; + fiat_secp384r1_uint1 x1551; + uint32_t x1552; + fiat_secp384r1_uint1 x1553; + uint32_t x1554; + fiat_secp384r1_uint1 x1555; + uint32_t x1556; + fiat_secp384r1_uint1 x1557; + uint32_t x1558; + fiat_secp384r1_uint1 x1559; + uint32_t x1560; + fiat_secp384r1_uint1 x1561; + uint32_t x1562; + fiat_secp384r1_uint1 x1563; + uint32_t x1564; + fiat_secp384r1_uint1 x1565; + uint32_t x1566; + uint32_t x1567; + uint32_t x1568; + uint32_t x1569; + uint32_t x1570; + uint32_t x1571; + uint32_t x1572; + uint32_t x1573; + uint32_t x1574; + uint32_t x1575; + uint32_t x1576; + uint32_t x1577; + uint32_t x1578; + uint32_t x1579; + uint32_t x1580; + uint32_t x1581; + uint32_t x1582; + uint32_t x1583; + uint32_t x1584; + uint32_t x1585; + uint32_t x1586; + fiat_secp384r1_uint1 x1587; + uint32_t x1588; + fiat_secp384r1_uint1 x1589; + uint32_t x1590; + fiat_secp384r1_uint1 x1591; + uint32_t x1592; + fiat_secp384r1_uint1 x1593; + uint32_t x1594; + fiat_secp384r1_uint1 x1595; + uint32_t x1596; + fiat_secp384r1_uint1 x1597; + uint32_t x1598; + fiat_secp384r1_uint1 x1599; + uint32_t x1600; + fiat_secp384r1_uint1 x1601; + uint32_t x1602; + uint32_t x1603; + fiat_secp384r1_uint1 x1604; + uint32_t x1605; + fiat_secp384r1_uint1 x1606; + uint32_t x1607; + fiat_secp384r1_uint1 x1608; + uint32_t x1609; + fiat_secp384r1_uint1 x1610; + uint32_t x1611; + fiat_secp384r1_uint1 x1612; + uint32_t x1613; + fiat_secp384r1_uint1 x1614; + uint32_t x1615; + fiat_secp384r1_uint1 x1616; + uint32_t x1617; + fiat_secp384r1_uint1 x1618; + uint32_t x1619; + fiat_secp384r1_uint1 x1620; + uint32_t x1621; + fiat_secp384r1_uint1 x1622; + uint32_t x1623; + fiat_secp384r1_uint1 x1624; + uint32_t x1625; + fiat_secp384r1_uint1 x1626; + uint32_t x1627; + fiat_secp384r1_uint1 x1628; + uint32_t x1629; + uint32_t x1630; + fiat_secp384r1_uint1 x1631; + uint32_t x1632; + fiat_secp384r1_uint1 x1633; + uint32_t x1634; + fiat_secp384r1_uint1 x1635; + uint32_t x1636; + fiat_secp384r1_uint1 x1637; + uint32_t x1638; + fiat_secp384r1_uint1 x1639; + uint32_t x1640; + fiat_secp384r1_uint1 x1641; + uint32_t x1642; + fiat_secp384r1_uint1 x1643; + uint32_t x1644; + fiat_secp384r1_uint1 x1645; + uint32_t x1646; + fiat_secp384r1_uint1 x1647; + uint32_t x1648; + fiat_secp384r1_uint1 x1649; + uint32_t x1650; + fiat_secp384r1_uint1 x1651; + uint32_t x1652; + fiat_secp384r1_uint1 x1653; + uint32_t x1654; + fiat_secp384r1_uint1 x1655; + uint32_t x1656; + uint32_t x1657; + uint32_t x1658; + uint32_t x1659; + uint32_t x1660; + uint32_t x1661; + uint32_t x1662; + uint32_t x1663; + uint32_t x1664; + uint32_t x1665; + uint32_t x1666; + uint32_t x1667; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[4]); + x5 = (arg1[5]); + x6 = (arg1[6]); + x7 = (arg1[7]); + x8 = (arg1[8]); + x9 = (arg1[9]); + x10 = (arg1[10]); + x11 = (arg1[11]); + x12 = (arg1[0]); + fiat_secp384r1_mulx_u32(&x13, &x14, x12, (arg2[11])); + fiat_secp384r1_mulx_u32(&x15, &x16, x12, (arg2[10])); + fiat_secp384r1_mulx_u32(&x17, &x18, x12, (arg2[9])); + fiat_secp384r1_mulx_u32(&x19, &x20, x12, (arg2[8])); + fiat_secp384r1_mulx_u32(&x21, &x22, x12, (arg2[7])); + fiat_secp384r1_mulx_u32(&x23, &x24, x12, (arg2[6])); + fiat_secp384r1_mulx_u32(&x25, &x26, x12, (arg2[5])); + fiat_secp384r1_mulx_u32(&x27, &x28, x12, (arg2[4])); + fiat_secp384r1_mulx_u32(&x29, &x30, x12, (arg2[3])); + fiat_secp384r1_mulx_u32(&x31, &x32, x12, (arg2[2])); + fiat_secp384r1_mulx_u32(&x33, &x34, x12, (arg2[1])); + fiat_secp384r1_mulx_u32(&x35, &x36, x12, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x37, &x38, 0x0, x36, x33); + fiat_secp384r1_addcarryx_u32(&x39, &x40, x38, x34, x31); + fiat_secp384r1_addcarryx_u32(&x41, &x42, x40, x32, x29); + fiat_secp384r1_addcarryx_u32(&x43, &x44, x42, x30, x27); + fiat_secp384r1_addcarryx_u32(&x45, &x46, x44, x28, x25); + fiat_secp384r1_addcarryx_u32(&x47, &x48, x46, x26, x23); + fiat_secp384r1_addcarryx_u32(&x49, &x50, x48, x24, x21); + fiat_secp384r1_addcarryx_u32(&x51, &x52, x50, x22, x19); + fiat_secp384r1_addcarryx_u32(&x53, &x54, x52, x20, x17); + fiat_secp384r1_addcarryx_u32(&x55, &x56, x54, x18, x15); + fiat_secp384r1_addcarryx_u32(&x57, &x58, x56, x16, x13); + x59 = (x58 + x14); + fiat_secp384r1_mulx_u32(&x60, &x61, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x62, &x63, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x64, &x65, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x66, &x67, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x68, &x69, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x70, &x71, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x72, &x73, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x74, &x75, x35, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x76, &x77, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x78, &x79, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x80, &x81, 0x0, x77, x74); + fiat_secp384r1_addcarryx_u32(&x82, &x83, x81, x75, x72); + fiat_secp384r1_addcarryx_u32(&x84, &x85, x83, x73, x70); + fiat_secp384r1_addcarryx_u32(&x86, &x87, x85, x71, x68); + fiat_secp384r1_addcarryx_u32(&x88, &x89, x87, x69, x66); + fiat_secp384r1_addcarryx_u32(&x90, &x91, x89, x67, x64); + fiat_secp384r1_addcarryx_u32(&x92, &x93, x91, x65, x62); + fiat_secp384r1_addcarryx_u32(&x94, &x95, x93, x63, x60); + x96 = (x95 + x61); + fiat_secp384r1_addcarryx_u32(&x97, &x98, 0x0, x35, x78); + fiat_secp384r1_addcarryx_u32(&x99, &x100, x98, x37, x79); + fiat_secp384r1_addcarryx_u32(&x101, &x102, x100, x39, 0x0); + fiat_secp384r1_addcarryx_u32(&x103, &x104, x102, x41, x76); + fiat_secp384r1_addcarryx_u32(&x105, &x106, x104, x43, x80); + fiat_secp384r1_addcarryx_u32(&x107, &x108, x106, x45, x82); + fiat_secp384r1_addcarryx_u32(&x109, &x110, x108, x47, x84); + fiat_secp384r1_addcarryx_u32(&x111, &x112, x110, x49, x86); + fiat_secp384r1_addcarryx_u32(&x113, &x114, x112, x51, x88); + fiat_secp384r1_addcarryx_u32(&x115, &x116, x114, x53, x90); + fiat_secp384r1_addcarryx_u32(&x117, &x118, x116, x55, x92); + fiat_secp384r1_addcarryx_u32(&x119, &x120, x118, x57, x94); + fiat_secp384r1_addcarryx_u32(&x121, &x122, x120, x59, x96); + fiat_secp384r1_mulx_u32(&x123, &x124, x1, (arg2[11])); + fiat_secp384r1_mulx_u32(&x125, &x126, x1, (arg2[10])); + fiat_secp384r1_mulx_u32(&x127, &x128, x1, (arg2[9])); + fiat_secp384r1_mulx_u32(&x129, &x130, x1, (arg2[8])); + fiat_secp384r1_mulx_u32(&x131, &x132, x1, (arg2[7])); + fiat_secp384r1_mulx_u32(&x133, &x134, x1, (arg2[6])); + fiat_secp384r1_mulx_u32(&x135, &x136, x1, (arg2[5])); + fiat_secp384r1_mulx_u32(&x137, &x138, x1, (arg2[4])); + fiat_secp384r1_mulx_u32(&x139, &x140, x1, (arg2[3])); + fiat_secp384r1_mulx_u32(&x141, &x142, x1, (arg2[2])); + fiat_secp384r1_mulx_u32(&x143, &x144, x1, (arg2[1])); + fiat_secp384r1_mulx_u32(&x145, &x146, x1, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x147, &x148, 0x0, x146, x143); + fiat_secp384r1_addcarryx_u32(&x149, &x150, x148, x144, x141); + fiat_secp384r1_addcarryx_u32(&x151, &x152, x150, x142, x139); + fiat_secp384r1_addcarryx_u32(&x153, &x154, x152, x140, x137); + fiat_secp384r1_addcarryx_u32(&x155, &x156, x154, x138, x135); + fiat_secp384r1_addcarryx_u32(&x157, &x158, x156, x136, x133); + fiat_secp384r1_addcarryx_u32(&x159, &x160, x158, x134, x131); + fiat_secp384r1_addcarryx_u32(&x161, &x162, x160, x132, x129); + fiat_secp384r1_addcarryx_u32(&x163, &x164, x162, x130, x127); + fiat_secp384r1_addcarryx_u32(&x165, &x166, x164, x128, x125); + fiat_secp384r1_addcarryx_u32(&x167, &x168, x166, x126, x123); + x169 = (x168 + x124); + fiat_secp384r1_addcarryx_u32(&x170, &x171, 0x0, x99, x145); + fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x101, x147); + fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x103, x149); + fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x105, x151); + fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x107, x153); + fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x109, x155); + fiat_secp384r1_addcarryx_u32(&x182, &x183, x181, x111, x157); + fiat_secp384r1_addcarryx_u32(&x184, &x185, x183, x113, x159); + fiat_secp384r1_addcarryx_u32(&x186, &x187, x185, x115, x161); + fiat_secp384r1_addcarryx_u32(&x188, &x189, x187, x117, x163); + fiat_secp384r1_addcarryx_u32(&x190, &x191, x189, x119, x165); + fiat_secp384r1_addcarryx_u32(&x192, &x193, x191, x121, x167); + fiat_secp384r1_addcarryx_u32(&x194, &x195, x193, x122, x169); + fiat_secp384r1_mulx_u32(&x196, &x197, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x198, &x199, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x200, &x201, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x202, &x203, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x204, &x205, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x206, &x207, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x208, &x209, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x210, &x211, x170, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x212, &x213, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x214, &x215, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x216, &x217, 0x0, x213, x210); + fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x211, x208); + fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x209, x206); + fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x207, x204); + fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x205, x202); + fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x203, x200); + fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x201, x198); + fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x199, x196); + x232 = (x231 + x197); + fiat_secp384r1_addcarryx_u32(&x233, &x234, 0x0, x170, x214); + fiat_secp384r1_addcarryx_u32(&x235, &x236, x234, x172, x215); + fiat_secp384r1_addcarryx_u32(&x237, &x238, x236, x174, 0x0); + fiat_secp384r1_addcarryx_u32(&x239, &x240, x238, x176, x212); + fiat_secp384r1_addcarryx_u32(&x241, &x242, x240, x178, x216); + fiat_secp384r1_addcarryx_u32(&x243, &x244, x242, x180, x218); + fiat_secp384r1_addcarryx_u32(&x245, &x246, x244, x182, x220); + fiat_secp384r1_addcarryx_u32(&x247, &x248, x246, x184, x222); + fiat_secp384r1_addcarryx_u32(&x249, &x250, x248, x186, x224); + fiat_secp384r1_addcarryx_u32(&x251, &x252, x250, x188, x226); + fiat_secp384r1_addcarryx_u32(&x253, &x254, x252, x190, x228); + fiat_secp384r1_addcarryx_u32(&x255, &x256, x254, x192, x230); + fiat_secp384r1_addcarryx_u32(&x257, &x258, x256, x194, x232); + x259 = ((uint32_t)x258 + x195); + fiat_secp384r1_mulx_u32(&x260, &x261, x2, (arg2[11])); + fiat_secp384r1_mulx_u32(&x262, &x263, x2, (arg2[10])); + fiat_secp384r1_mulx_u32(&x264, &x265, x2, (arg2[9])); + fiat_secp384r1_mulx_u32(&x266, &x267, x2, (arg2[8])); + fiat_secp384r1_mulx_u32(&x268, &x269, x2, (arg2[7])); + fiat_secp384r1_mulx_u32(&x270, &x271, x2, (arg2[6])); + fiat_secp384r1_mulx_u32(&x272, &x273, x2, (arg2[5])); + fiat_secp384r1_mulx_u32(&x274, &x275, x2, (arg2[4])); + fiat_secp384r1_mulx_u32(&x276, &x277, x2, (arg2[3])); + fiat_secp384r1_mulx_u32(&x278, &x279, x2, (arg2[2])); + fiat_secp384r1_mulx_u32(&x280, &x281, x2, (arg2[1])); + fiat_secp384r1_mulx_u32(&x282, &x283, x2, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x284, &x285, 0x0, x283, x280); + fiat_secp384r1_addcarryx_u32(&x286, &x287, x285, x281, x278); + fiat_secp384r1_addcarryx_u32(&x288, &x289, x287, x279, x276); + fiat_secp384r1_addcarryx_u32(&x290, &x291, x289, x277, x274); + fiat_secp384r1_addcarryx_u32(&x292, &x293, x291, x275, x272); + fiat_secp384r1_addcarryx_u32(&x294, &x295, x293, x273, x270); + fiat_secp384r1_addcarryx_u32(&x296, &x297, x295, x271, x268); + fiat_secp384r1_addcarryx_u32(&x298, &x299, x297, x269, x266); + fiat_secp384r1_addcarryx_u32(&x300, &x301, x299, x267, x264); + fiat_secp384r1_addcarryx_u32(&x302, &x303, x301, x265, x262); + fiat_secp384r1_addcarryx_u32(&x304, &x305, x303, x263, x260); + x306 = (x305 + x261); + fiat_secp384r1_addcarryx_u32(&x307, &x308, 0x0, x235, x282); + fiat_secp384r1_addcarryx_u32(&x309, &x310, x308, x237, x284); + fiat_secp384r1_addcarryx_u32(&x311, &x312, x310, x239, x286); + fiat_secp384r1_addcarryx_u32(&x313, &x314, x312, x241, x288); + fiat_secp384r1_addcarryx_u32(&x315, &x316, x314, x243, x290); + fiat_secp384r1_addcarryx_u32(&x317, &x318, x316, x245, x292); + fiat_secp384r1_addcarryx_u32(&x319, &x320, x318, x247, x294); + fiat_secp384r1_addcarryx_u32(&x321, &x322, x320, x249, x296); + fiat_secp384r1_addcarryx_u32(&x323, &x324, x322, x251, x298); + fiat_secp384r1_addcarryx_u32(&x325, &x326, x324, x253, x300); + fiat_secp384r1_addcarryx_u32(&x327, &x328, x326, x255, x302); + fiat_secp384r1_addcarryx_u32(&x329, &x330, x328, x257, x304); + fiat_secp384r1_addcarryx_u32(&x331, &x332, x330, x259, x306); + fiat_secp384r1_mulx_u32(&x333, &x334, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x335, &x336, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x337, &x338, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x339, &x340, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x341, &x342, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x343, &x344, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x345, &x346, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x347, &x348, x307, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x349, &x350, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x351, &x352, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x353, &x354, 0x0, x350, x347); + fiat_secp384r1_addcarryx_u32(&x355, &x356, x354, x348, x345); + fiat_secp384r1_addcarryx_u32(&x357, &x358, x356, x346, x343); + fiat_secp384r1_addcarryx_u32(&x359, &x360, x358, x344, x341); + fiat_secp384r1_addcarryx_u32(&x361, &x362, x360, x342, x339); + fiat_secp384r1_addcarryx_u32(&x363, &x364, x362, x340, x337); + fiat_secp384r1_addcarryx_u32(&x365, &x366, x364, x338, x335); + fiat_secp384r1_addcarryx_u32(&x367, &x368, x366, x336, x333); + x369 = (x368 + x334); + fiat_secp384r1_addcarryx_u32(&x370, &x371, 0x0, x307, x351); + fiat_secp384r1_addcarryx_u32(&x372, &x373, x371, x309, x352); + fiat_secp384r1_addcarryx_u32(&x374, &x375, x373, x311, 0x0); + fiat_secp384r1_addcarryx_u32(&x376, &x377, x375, x313, x349); + fiat_secp384r1_addcarryx_u32(&x378, &x379, x377, x315, x353); + fiat_secp384r1_addcarryx_u32(&x380, &x381, x379, x317, x355); + fiat_secp384r1_addcarryx_u32(&x382, &x383, x381, x319, x357); + fiat_secp384r1_addcarryx_u32(&x384, &x385, x383, x321, x359); + fiat_secp384r1_addcarryx_u32(&x386, &x387, x385, x323, x361); + fiat_secp384r1_addcarryx_u32(&x388, &x389, x387, x325, x363); + fiat_secp384r1_addcarryx_u32(&x390, &x391, x389, x327, x365); + fiat_secp384r1_addcarryx_u32(&x392, &x393, x391, x329, x367); + fiat_secp384r1_addcarryx_u32(&x394, &x395, x393, x331, x369); + x396 = ((uint32_t)x395 + x332); + fiat_secp384r1_mulx_u32(&x397, &x398, x3, (arg2[11])); + fiat_secp384r1_mulx_u32(&x399, &x400, x3, (arg2[10])); + fiat_secp384r1_mulx_u32(&x401, &x402, x3, (arg2[9])); + fiat_secp384r1_mulx_u32(&x403, &x404, x3, (arg2[8])); + fiat_secp384r1_mulx_u32(&x405, &x406, x3, (arg2[7])); + fiat_secp384r1_mulx_u32(&x407, &x408, x3, (arg2[6])); + fiat_secp384r1_mulx_u32(&x409, &x410, x3, (arg2[5])); + fiat_secp384r1_mulx_u32(&x411, &x412, x3, (arg2[4])); + fiat_secp384r1_mulx_u32(&x413, &x414, x3, (arg2[3])); + fiat_secp384r1_mulx_u32(&x415, &x416, x3, (arg2[2])); + fiat_secp384r1_mulx_u32(&x417, &x418, x3, (arg2[1])); + fiat_secp384r1_mulx_u32(&x419, &x420, x3, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x421, &x422, 0x0, x420, x417); + fiat_secp384r1_addcarryx_u32(&x423, &x424, x422, x418, x415); + fiat_secp384r1_addcarryx_u32(&x425, &x426, x424, x416, x413); + fiat_secp384r1_addcarryx_u32(&x427, &x428, x426, x414, x411); + fiat_secp384r1_addcarryx_u32(&x429, &x430, x428, x412, x409); + fiat_secp384r1_addcarryx_u32(&x431, &x432, x430, x410, x407); + fiat_secp384r1_addcarryx_u32(&x433, &x434, x432, x408, x405); + fiat_secp384r1_addcarryx_u32(&x435, &x436, x434, x406, x403); + fiat_secp384r1_addcarryx_u32(&x437, &x438, x436, x404, x401); + fiat_secp384r1_addcarryx_u32(&x439, &x440, x438, x402, x399); + fiat_secp384r1_addcarryx_u32(&x441, &x442, x440, x400, x397); + x443 = (x442 + x398); + fiat_secp384r1_addcarryx_u32(&x444, &x445, 0x0, x372, x419); + fiat_secp384r1_addcarryx_u32(&x446, &x447, x445, x374, x421); + fiat_secp384r1_addcarryx_u32(&x448, &x449, x447, x376, x423); + fiat_secp384r1_addcarryx_u32(&x450, &x451, x449, x378, x425); + fiat_secp384r1_addcarryx_u32(&x452, &x453, x451, x380, x427); + fiat_secp384r1_addcarryx_u32(&x454, &x455, x453, x382, x429); + fiat_secp384r1_addcarryx_u32(&x456, &x457, x455, x384, x431); + fiat_secp384r1_addcarryx_u32(&x458, &x459, x457, x386, x433); + fiat_secp384r1_addcarryx_u32(&x460, &x461, x459, x388, x435); + fiat_secp384r1_addcarryx_u32(&x462, &x463, x461, x390, x437); + fiat_secp384r1_addcarryx_u32(&x464, &x465, x463, x392, x439); + fiat_secp384r1_addcarryx_u32(&x466, &x467, x465, x394, x441); + fiat_secp384r1_addcarryx_u32(&x468, &x469, x467, x396, x443); + fiat_secp384r1_mulx_u32(&x470, &x471, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x472, &x473, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x474, &x475, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x476, &x477, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x478, &x479, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x480, &x481, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x482, &x483, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x484, &x485, x444, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x486, &x487, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x488, &x489, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x490, &x491, 0x0, x487, x484); + fiat_secp384r1_addcarryx_u32(&x492, &x493, x491, x485, x482); + fiat_secp384r1_addcarryx_u32(&x494, &x495, x493, x483, x480); + fiat_secp384r1_addcarryx_u32(&x496, &x497, x495, x481, x478); + fiat_secp384r1_addcarryx_u32(&x498, &x499, x497, x479, x476); + fiat_secp384r1_addcarryx_u32(&x500, &x501, x499, x477, x474); + fiat_secp384r1_addcarryx_u32(&x502, &x503, x501, x475, x472); + fiat_secp384r1_addcarryx_u32(&x504, &x505, x503, x473, x470); + x506 = (x505 + x471); + fiat_secp384r1_addcarryx_u32(&x507, &x508, 0x0, x444, x488); + fiat_secp384r1_addcarryx_u32(&x509, &x510, x508, x446, x489); + fiat_secp384r1_addcarryx_u32(&x511, &x512, x510, x448, 0x0); + fiat_secp384r1_addcarryx_u32(&x513, &x514, x512, x450, x486); + fiat_secp384r1_addcarryx_u32(&x515, &x516, x514, x452, x490); + fiat_secp384r1_addcarryx_u32(&x517, &x518, x516, x454, x492); + fiat_secp384r1_addcarryx_u32(&x519, &x520, x518, x456, x494); + fiat_secp384r1_addcarryx_u32(&x521, &x522, x520, x458, x496); + fiat_secp384r1_addcarryx_u32(&x523, &x524, x522, x460, x498); + fiat_secp384r1_addcarryx_u32(&x525, &x526, x524, x462, x500); + fiat_secp384r1_addcarryx_u32(&x527, &x528, x526, x464, x502); + fiat_secp384r1_addcarryx_u32(&x529, &x530, x528, x466, x504); + fiat_secp384r1_addcarryx_u32(&x531, &x532, x530, x468, x506); + x533 = ((uint32_t)x532 + x469); + fiat_secp384r1_mulx_u32(&x534, &x535, x4, (arg2[11])); + fiat_secp384r1_mulx_u32(&x536, &x537, x4, (arg2[10])); + fiat_secp384r1_mulx_u32(&x538, &x539, x4, (arg2[9])); + fiat_secp384r1_mulx_u32(&x540, &x541, x4, (arg2[8])); + fiat_secp384r1_mulx_u32(&x542, &x543, x4, (arg2[7])); + fiat_secp384r1_mulx_u32(&x544, &x545, x4, (arg2[6])); + fiat_secp384r1_mulx_u32(&x546, &x547, x4, (arg2[5])); + fiat_secp384r1_mulx_u32(&x548, &x549, x4, (arg2[4])); + fiat_secp384r1_mulx_u32(&x550, &x551, x4, (arg2[3])); + fiat_secp384r1_mulx_u32(&x552, &x553, x4, (arg2[2])); + fiat_secp384r1_mulx_u32(&x554, &x555, x4, (arg2[1])); + fiat_secp384r1_mulx_u32(&x556, &x557, x4, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x558, &x559, 0x0, x557, x554); + fiat_secp384r1_addcarryx_u32(&x560, &x561, x559, x555, x552); + fiat_secp384r1_addcarryx_u32(&x562, &x563, x561, x553, x550); + fiat_secp384r1_addcarryx_u32(&x564, &x565, x563, x551, x548); + fiat_secp384r1_addcarryx_u32(&x566, &x567, x565, x549, x546); + fiat_secp384r1_addcarryx_u32(&x568, &x569, x567, x547, x544); + fiat_secp384r1_addcarryx_u32(&x570, &x571, x569, x545, x542); + fiat_secp384r1_addcarryx_u32(&x572, &x573, x571, x543, x540); + fiat_secp384r1_addcarryx_u32(&x574, &x575, x573, x541, x538); + fiat_secp384r1_addcarryx_u32(&x576, &x577, x575, x539, x536); + fiat_secp384r1_addcarryx_u32(&x578, &x579, x577, x537, x534); + x580 = (x579 + x535); + fiat_secp384r1_addcarryx_u32(&x581, &x582, 0x0, x509, x556); + fiat_secp384r1_addcarryx_u32(&x583, &x584, x582, x511, x558); + fiat_secp384r1_addcarryx_u32(&x585, &x586, x584, x513, x560); + fiat_secp384r1_addcarryx_u32(&x587, &x588, x586, x515, x562); + fiat_secp384r1_addcarryx_u32(&x589, &x590, x588, x517, x564); + fiat_secp384r1_addcarryx_u32(&x591, &x592, x590, x519, x566); + fiat_secp384r1_addcarryx_u32(&x593, &x594, x592, x521, x568); + fiat_secp384r1_addcarryx_u32(&x595, &x596, x594, x523, x570); + fiat_secp384r1_addcarryx_u32(&x597, &x598, x596, x525, x572); + fiat_secp384r1_addcarryx_u32(&x599, &x600, x598, x527, x574); + fiat_secp384r1_addcarryx_u32(&x601, &x602, x600, x529, x576); + fiat_secp384r1_addcarryx_u32(&x603, &x604, x602, x531, x578); + fiat_secp384r1_addcarryx_u32(&x605, &x606, x604, x533, x580); + fiat_secp384r1_mulx_u32(&x607, &x608, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x609, &x610, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x611, &x612, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x613, &x614, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x615, &x616, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x617, &x618, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x619, &x620, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x621, &x622, x581, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x623, &x624, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x625, &x626, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x627, &x628, 0x0, x624, x621); + fiat_secp384r1_addcarryx_u32(&x629, &x630, x628, x622, x619); + fiat_secp384r1_addcarryx_u32(&x631, &x632, x630, x620, x617); + fiat_secp384r1_addcarryx_u32(&x633, &x634, x632, x618, x615); + fiat_secp384r1_addcarryx_u32(&x635, &x636, x634, x616, x613); + fiat_secp384r1_addcarryx_u32(&x637, &x638, x636, x614, x611); + fiat_secp384r1_addcarryx_u32(&x639, &x640, x638, x612, x609); + fiat_secp384r1_addcarryx_u32(&x641, &x642, x640, x610, x607); + x643 = (x642 + x608); + fiat_secp384r1_addcarryx_u32(&x644, &x645, 0x0, x581, x625); + fiat_secp384r1_addcarryx_u32(&x646, &x647, x645, x583, x626); + fiat_secp384r1_addcarryx_u32(&x648, &x649, x647, x585, 0x0); + fiat_secp384r1_addcarryx_u32(&x650, &x651, x649, x587, x623); + fiat_secp384r1_addcarryx_u32(&x652, &x653, x651, x589, x627); + fiat_secp384r1_addcarryx_u32(&x654, &x655, x653, x591, x629); + fiat_secp384r1_addcarryx_u32(&x656, &x657, x655, x593, x631); + fiat_secp384r1_addcarryx_u32(&x658, &x659, x657, x595, x633); + fiat_secp384r1_addcarryx_u32(&x660, &x661, x659, x597, x635); + fiat_secp384r1_addcarryx_u32(&x662, &x663, x661, x599, x637); + fiat_secp384r1_addcarryx_u32(&x664, &x665, x663, x601, x639); + fiat_secp384r1_addcarryx_u32(&x666, &x667, x665, x603, x641); + fiat_secp384r1_addcarryx_u32(&x668, &x669, x667, x605, x643); + x670 = ((uint32_t)x669 + x606); + fiat_secp384r1_mulx_u32(&x671, &x672, x5, (arg2[11])); + fiat_secp384r1_mulx_u32(&x673, &x674, x5, (arg2[10])); + fiat_secp384r1_mulx_u32(&x675, &x676, x5, (arg2[9])); + fiat_secp384r1_mulx_u32(&x677, &x678, x5, (arg2[8])); + fiat_secp384r1_mulx_u32(&x679, &x680, x5, (arg2[7])); + fiat_secp384r1_mulx_u32(&x681, &x682, x5, (arg2[6])); + fiat_secp384r1_mulx_u32(&x683, &x684, x5, (arg2[5])); + fiat_secp384r1_mulx_u32(&x685, &x686, x5, (arg2[4])); + fiat_secp384r1_mulx_u32(&x687, &x688, x5, (arg2[3])); + fiat_secp384r1_mulx_u32(&x689, &x690, x5, (arg2[2])); + fiat_secp384r1_mulx_u32(&x691, &x692, x5, (arg2[1])); + fiat_secp384r1_mulx_u32(&x693, &x694, x5, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x695, &x696, 0x0, x694, x691); + fiat_secp384r1_addcarryx_u32(&x697, &x698, x696, x692, x689); + fiat_secp384r1_addcarryx_u32(&x699, &x700, x698, x690, x687); + fiat_secp384r1_addcarryx_u32(&x701, &x702, x700, x688, x685); + fiat_secp384r1_addcarryx_u32(&x703, &x704, x702, x686, x683); + fiat_secp384r1_addcarryx_u32(&x705, &x706, x704, x684, x681); + fiat_secp384r1_addcarryx_u32(&x707, &x708, x706, x682, x679); + fiat_secp384r1_addcarryx_u32(&x709, &x710, x708, x680, x677); + fiat_secp384r1_addcarryx_u32(&x711, &x712, x710, x678, x675); + fiat_secp384r1_addcarryx_u32(&x713, &x714, x712, x676, x673); + fiat_secp384r1_addcarryx_u32(&x715, &x716, x714, x674, x671); + x717 = (x716 + x672); + fiat_secp384r1_addcarryx_u32(&x718, &x719, 0x0, x646, x693); + fiat_secp384r1_addcarryx_u32(&x720, &x721, x719, x648, x695); + fiat_secp384r1_addcarryx_u32(&x722, &x723, x721, x650, x697); + fiat_secp384r1_addcarryx_u32(&x724, &x725, x723, x652, x699); + fiat_secp384r1_addcarryx_u32(&x726, &x727, x725, x654, x701); + fiat_secp384r1_addcarryx_u32(&x728, &x729, x727, x656, x703); + fiat_secp384r1_addcarryx_u32(&x730, &x731, x729, x658, x705); + fiat_secp384r1_addcarryx_u32(&x732, &x733, x731, x660, x707); + fiat_secp384r1_addcarryx_u32(&x734, &x735, x733, x662, x709); + fiat_secp384r1_addcarryx_u32(&x736, &x737, x735, x664, x711); + fiat_secp384r1_addcarryx_u32(&x738, &x739, x737, x666, x713); + fiat_secp384r1_addcarryx_u32(&x740, &x741, x739, x668, x715); + fiat_secp384r1_addcarryx_u32(&x742, &x743, x741, x670, x717); + fiat_secp384r1_mulx_u32(&x744, &x745, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x746, &x747, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x748, &x749, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x750, &x751, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x752, &x753, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x754, &x755, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x756, &x757, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x758, &x759, x718, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x760, &x761, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x762, &x763, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x764, &x765, 0x0, x761, x758); + fiat_secp384r1_addcarryx_u32(&x766, &x767, x765, x759, x756); + fiat_secp384r1_addcarryx_u32(&x768, &x769, x767, x757, x754); + fiat_secp384r1_addcarryx_u32(&x770, &x771, x769, x755, x752); + fiat_secp384r1_addcarryx_u32(&x772, &x773, x771, x753, x750); + fiat_secp384r1_addcarryx_u32(&x774, &x775, x773, x751, x748); + fiat_secp384r1_addcarryx_u32(&x776, &x777, x775, x749, x746); + fiat_secp384r1_addcarryx_u32(&x778, &x779, x777, x747, x744); + x780 = (x779 + x745); + fiat_secp384r1_addcarryx_u32(&x781, &x782, 0x0, x718, x762); + fiat_secp384r1_addcarryx_u32(&x783, &x784, x782, x720, x763); + fiat_secp384r1_addcarryx_u32(&x785, &x786, x784, x722, 0x0); + fiat_secp384r1_addcarryx_u32(&x787, &x788, x786, x724, x760); + fiat_secp384r1_addcarryx_u32(&x789, &x790, x788, x726, x764); + fiat_secp384r1_addcarryx_u32(&x791, &x792, x790, x728, x766); + fiat_secp384r1_addcarryx_u32(&x793, &x794, x792, x730, x768); + fiat_secp384r1_addcarryx_u32(&x795, &x796, x794, x732, x770); + fiat_secp384r1_addcarryx_u32(&x797, &x798, x796, x734, x772); + fiat_secp384r1_addcarryx_u32(&x799, &x800, x798, x736, x774); + fiat_secp384r1_addcarryx_u32(&x801, &x802, x800, x738, x776); + fiat_secp384r1_addcarryx_u32(&x803, &x804, x802, x740, x778); + fiat_secp384r1_addcarryx_u32(&x805, &x806, x804, x742, x780); + x807 = ((uint32_t)x806 + x743); + fiat_secp384r1_mulx_u32(&x808, &x809, x6, (arg2[11])); + fiat_secp384r1_mulx_u32(&x810, &x811, x6, (arg2[10])); + fiat_secp384r1_mulx_u32(&x812, &x813, x6, (arg2[9])); + fiat_secp384r1_mulx_u32(&x814, &x815, x6, (arg2[8])); + fiat_secp384r1_mulx_u32(&x816, &x817, x6, (arg2[7])); + fiat_secp384r1_mulx_u32(&x818, &x819, x6, (arg2[6])); + fiat_secp384r1_mulx_u32(&x820, &x821, x6, (arg2[5])); + fiat_secp384r1_mulx_u32(&x822, &x823, x6, (arg2[4])); + fiat_secp384r1_mulx_u32(&x824, &x825, x6, (arg2[3])); + fiat_secp384r1_mulx_u32(&x826, &x827, x6, (arg2[2])); + fiat_secp384r1_mulx_u32(&x828, &x829, x6, (arg2[1])); + fiat_secp384r1_mulx_u32(&x830, &x831, x6, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x832, &x833, 0x0, x831, x828); + fiat_secp384r1_addcarryx_u32(&x834, &x835, x833, x829, x826); + fiat_secp384r1_addcarryx_u32(&x836, &x837, x835, x827, x824); + fiat_secp384r1_addcarryx_u32(&x838, &x839, x837, x825, x822); + fiat_secp384r1_addcarryx_u32(&x840, &x841, x839, x823, x820); + fiat_secp384r1_addcarryx_u32(&x842, &x843, x841, x821, x818); + fiat_secp384r1_addcarryx_u32(&x844, &x845, x843, x819, x816); + fiat_secp384r1_addcarryx_u32(&x846, &x847, x845, x817, x814); + fiat_secp384r1_addcarryx_u32(&x848, &x849, x847, x815, x812); + fiat_secp384r1_addcarryx_u32(&x850, &x851, x849, x813, x810); + fiat_secp384r1_addcarryx_u32(&x852, &x853, x851, x811, x808); + x854 = (x853 + x809); + fiat_secp384r1_addcarryx_u32(&x855, &x856, 0x0, x783, x830); + fiat_secp384r1_addcarryx_u32(&x857, &x858, x856, x785, x832); + fiat_secp384r1_addcarryx_u32(&x859, &x860, x858, x787, x834); + fiat_secp384r1_addcarryx_u32(&x861, &x862, x860, x789, x836); + fiat_secp384r1_addcarryx_u32(&x863, &x864, x862, x791, x838); + fiat_secp384r1_addcarryx_u32(&x865, &x866, x864, x793, x840); + fiat_secp384r1_addcarryx_u32(&x867, &x868, x866, x795, x842); + fiat_secp384r1_addcarryx_u32(&x869, &x870, x868, x797, x844); + fiat_secp384r1_addcarryx_u32(&x871, &x872, x870, x799, x846); + fiat_secp384r1_addcarryx_u32(&x873, &x874, x872, x801, x848); + fiat_secp384r1_addcarryx_u32(&x875, &x876, x874, x803, x850); + fiat_secp384r1_addcarryx_u32(&x877, &x878, x876, x805, x852); + fiat_secp384r1_addcarryx_u32(&x879, &x880, x878, x807, x854); + fiat_secp384r1_mulx_u32(&x881, &x882, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x883, &x884, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x885, &x886, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x887, &x888, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x889, &x890, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x891, &x892, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x893, &x894, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x895, &x896, x855, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x897, &x898, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x899, &x900, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x901, &x902, 0x0, x898, x895); + fiat_secp384r1_addcarryx_u32(&x903, &x904, x902, x896, x893); + fiat_secp384r1_addcarryx_u32(&x905, &x906, x904, x894, x891); + fiat_secp384r1_addcarryx_u32(&x907, &x908, x906, x892, x889); + fiat_secp384r1_addcarryx_u32(&x909, &x910, x908, x890, x887); + fiat_secp384r1_addcarryx_u32(&x911, &x912, x910, x888, x885); + fiat_secp384r1_addcarryx_u32(&x913, &x914, x912, x886, x883); + fiat_secp384r1_addcarryx_u32(&x915, &x916, x914, x884, x881); + x917 = (x916 + x882); + fiat_secp384r1_addcarryx_u32(&x918, &x919, 0x0, x855, x899); + fiat_secp384r1_addcarryx_u32(&x920, &x921, x919, x857, x900); + fiat_secp384r1_addcarryx_u32(&x922, &x923, x921, x859, 0x0); + fiat_secp384r1_addcarryx_u32(&x924, &x925, x923, x861, x897); + fiat_secp384r1_addcarryx_u32(&x926, &x927, x925, x863, x901); + fiat_secp384r1_addcarryx_u32(&x928, &x929, x927, x865, x903); + fiat_secp384r1_addcarryx_u32(&x930, &x931, x929, x867, x905); + fiat_secp384r1_addcarryx_u32(&x932, &x933, x931, x869, x907); + fiat_secp384r1_addcarryx_u32(&x934, &x935, x933, x871, x909); + fiat_secp384r1_addcarryx_u32(&x936, &x937, x935, x873, x911); + fiat_secp384r1_addcarryx_u32(&x938, &x939, x937, x875, x913); + fiat_secp384r1_addcarryx_u32(&x940, &x941, x939, x877, x915); + fiat_secp384r1_addcarryx_u32(&x942, &x943, x941, x879, x917); + x944 = ((uint32_t)x943 + x880); + fiat_secp384r1_mulx_u32(&x945, &x946, x7, (arg2[11])); + fiat_secp384r1_mulx_u32(&x947, &x948, x7, (arg2[10])); + fiat_secp384r1_mulx_u32(&x949, &x950, x7, (arg2[9])); + fiat_secp384r1_mulx_u32(&x951, &x952, x7, (arg2[8])); + fiat_secp384r1_mulx_u32(&x953, &x954, x7, (arg2[7])); + fiat_secp384r1_mulx_u32(&x955, &x956, x7, (arg2[6])); + fiat_secp384r1_mulx_u32(&x957, &x958, x7, (arg2[5])); + fiat_secp384r1_mulx_u32(&x959, &x960, x7, (arg2[4])); + fiat_secp384r1_mulx_u32(&x961, &x962, x7, (arg2[3])); + fiat_secp384r1_mulx_u32(&x963, &x964, x7, (arg2[2])); + fiat_secp384r1_mulx_u32(&x965, &x966, x7, (arg2[1])); + fiat_secp384r1_mulx_u32(&x967, &x968, x7, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x969, &x970, 0x0, x968, x965); + fiat_secp384r1_addcarryx_u32(&x971, &x972, x970, x966, x963); + fiat_secp384r1_addcarryx_u32(&x973, &x974, x972, x964, x961); + fiat_secp384r1_addcarryx_u32(&x975, &x976, x974, x962, x959); + fiat_secp384r1_addcarryx_u32(&x977, &x978, x976, x960, x957); + fiat_secp384r1_addcarryx_u32(&x979, &x980, x978, x958, x955); + fiat_secp384r1_addcarryx_u32(&x981, &x982, x980, x956, x953); + fiat_secp384r1_addcarryx_u32(&x983, &x984, x982, x954, x951); + fiat_secp384r1_addcarryx_u32(&x985, &x986, x984, x952, x949); + fiat_secp384r1_addcarryx_u32(&x987, &x988, x986, x950, x947); + fiat_secp384r1_addcarryx_u32(&x989, &x990, x988, x948, x945); + x991 = (x990 + x946); + fiat_secp384r1_addcarryx_u32(&x992, &x993, 0x0, x920, x967); + fiat_secp384r1_addcarryx_u32(&x994, &x995, x993, x922, x969); + fiat_secp384r1_addcarryx_u32(&x996, &x997, x995, x924, x971); + fiat_secp384r1_addcarryx_u32(&x998, &x999, x997, x926, x973); + fiat_secp384r1_addcarryx_u32(&x1000, &x1001, x999, x928, x975); + fiat_secp384r1_addcarryx_u32(&x1002, &x1003, x1001, x930, x977); + fiat_secp384r1_addcarryx_u32(&x1004, &x1005, x1003, x932, x979); + fiat_secp384r1_addcarryx_u32(&x1006, &x1007, x1005, x934, x981); + fiat_secp384r1_addcarryx_u32(&x1008, &x1009, x1007, x936, x983); + fiat_secp384r1_addcarryx_u32(&x1010, &x1011, x1009, x938, x985); + fiat_secp384r1_addcarryx_u32(&x1012, &x1013, x1011, x940, x987); + fiat_secp384r1_addcarryx_u32(&x1014, &x1015, x1013, x942, x989); + fiat_secp384r1_addcarryx_u32(&x1016, &x1017, x1015, x944, x991); + fiat_secp384r1_mulx_u32(&x1018, &x1019, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1020, &x1021, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1022, &x1023, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1024, &x1025, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1026, &x1027, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1028, &x1029, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1030, &x1031, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1032, &x1033, x992, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1034, &x1035, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1036, &x1037, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1038, &x1039, 0x0, x1035, x1032); + fiat_secp384r1_addcarryx_u32(&x1040, &x1041, x1039, x1033, x1030); + fiat_secp384r1_addcarryx_u32(&x1042, &x1043, x1041, x1031, x1028); + fiat_secp384r1_addcarryx_u32(&x1044, &x1045, x1043, x1029, x1026); + fiat_secp384r1_addcarryx_u32(&x1046, &x1047, x1045, x1027, x1024); + fiat_secp384r1_addcarryx_u32(&x1048, &x1049, x1047, x1025, x1022); + fiat_secp384r1_addcarryx_u32(&x1050, &x1051, x1049, x1023, x1020); + fiat_secp384r1_addcarryx_u32(&x1052, &x1053, x1051, x1021, x1018); + x1054 = (x1053 + x1019); + fiat_secp384r1_addcarryx_u32(&x1055, &x1056, 0x0, x992, x1036); + fiat_secp384r1_addcarryx_u32(&x1057, &x1058, x1056, x994, x1037); + fiat_secp384r1_addcarryx_u32(&x1059, &x1060, x1058, x996, 0x0); + fiat_secp384r1_addcarryx_u32(&x1061, &x1062, x1060, x998, x1034); + fiat_secp384r1_addcarryx_u32(&x1063, &x1064, x1062, x1000, x1038); + fiat_secp384r1_addcarryx_u32(&x1065, &x1066, x1064, x1002, x1040); + fiat_secp384r1_addcarryx_u32(&x1067, &x1068, x1066, x1004, x1042); + fiat_secp384r1_addcarryx_u32(&x1069, &x1070, x1068, x1006, x1044); + fiat_secp384r1_addcarryx_u32(&x1071, &x1072, x1070, x1008, x1046); + fiat_secp384r1_addcarryx_u32(&x1073, &x1074, x1072, x1010, x1048); + fiat_secp384r1_addcarryx_u32(&x1075, &x1076, x1074, x1012, x1050); + fiat_secp384r1_addcarryx_u32(&x1077, &x1078, x1076, x1014, x1052); + fiat_secp384r1_addcarryx_u32(&x1079, &x1080, x1078, x1016, x1054); + x1081 = ((uint32_t)x1080 + x1017); + fiat_secp384r1_mulx_u32(&x1082, &x1083, x8, (arg2[11])); + fiat_secp384r1_mulx_u32(&x1084, &x1085, x8, (arg2[10])); + fiat_secp384r1_mulx_u32(&x1086, &x1087, x8, (arg2[9])); + fiat_secp384r1_mulx_u32(&x1088, &x1089, x8, (arg2[8])); + fiat_secp384r1_mulx_u32(&x1090, &x1091, x8, (arg2[7])); + fiat_secp384r1_mulx_u32(&x1092, &x1093, x8, (arg2[6])); + fiat_secp384r1_mulx_u32(&x1094, &x1095, x8, (arg2[5])); + fiat_secp384r1_mulx_u32(&x1096, &x1097, x8, (arg2[4])); + fiat_secp384r1_mulx_u32(&x1098, &x1099, x8, (arg2[3])); + fiat_secp384r1_mulx_u32(&x1100, &x1101, x8, (arg2[2])); + fiat_secp384r1_mulx_u32(&x1102, &x1103, x8, (arg2[1])); + fiat_secp384r1_mulx_u32(&x1104, &x1105, x8, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x1106, &x1107, 0x0, x1105, x1102); + fiat_secp384r1_addcarryx_u32(&x1108, &x1109, x1107, x1103, x1100); + fiat_secp384r1_addcarryx_u32(&x1110, &x1111, x1109, x1101, x1098); + fiat_secp384r1_addcarryx_u32(&x1112, &x1113, x1111, x1099, x1096); + fiat_secp384r1_addcarryx_u32(&x1114, &x1115, x1113, x1097, x1094); + fiat_secp384r1_addcarryx_u32(&x1116, &x1117, x1115, x1095, x1092); + fiat_secp384r1_addcarryx_u32(&x1118, &x1119, x1117, x1093, x1090); + fiat_secp384r1_addcarryx_u32(&x1120, &x1121, x1119, x1091, x1088); + fiat_secp384r1_addcarryx_u32(&x1122, &x1123, x1121, x1089, x1086); + fiat_secp384r1_addcarryx_u32(&x1124, &x1125, x1123, x1087, x1084); + fiat_secp384r1_addcarryx_u32(&x1126, &x1127, x1125, x1085, x1082); + x1128 = (x1127 + x1083); + fiat_secp384r1_addcarryx_u32(&x1129, &x1130, 0x0, x1057, x1104); + fiat_secp384r1_addcarryx_u32(&x1131, &x1132, x1130, x1059, x1106); + fiat_secp384r1_addcarryx_u32(&x1133, &x1134, x1132, x1061, x1108); + fiat_secp384r1_addcarryx_u32(&x1135, &x1136, x1134, x1063, x1110); + fiat_secp384r1_addcarryx_u32(&x1137, &x1138, x1136, x1065, x1112); + fiat_secp384r1_addcarryx_u32(&x1139, &x1140, x1138, x1067, x1114); + fiat_secp384r1_addcarryx_u32(&x1141, &x1142, x1140, x1069, x1116); + fiat_secp384r1_addcarryx_u32(&x1143, &x1144, x1142, x1071, x1118); + fiat_secp384r1_addcarryx_u32(&x1145, &x1146, x1144, x1073, x1120); + fiat_secp384r1_addcarryx_u32(&x1147, &x1148, x1146, x1075, x1122); + fiat_secp384r1_addcarryx_u32(&x1149, &x1150, x1148, x1077, x1124); + fiat_secp384r1_addcarryx_u32(&x1151, &x1152, x1150, x1079, x1126); + fiat_secp384r1_addcarryx_u32(&x1153, &x1154, x1152, x1081, x1128); + fiat_secp384r1_mulx_u32(&x1155, &x1156, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1157, &x1158, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1159, &x1160, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1161, &x1162, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1163, &x1164, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1165, &x1166, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1167, &x1168, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1169, &x1170, x1129, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1171, &x1172, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1173, &x1174, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1175, &x1176, 0x0, x1172, x1169); + fiat_secp384r1_addcarryx_u32(&x1177, &x1178, x1176, x1170, x1167); + fiat_secp384r1_addcarryx_u32(&x1179, &x1180, x1178, x1168, x1165); + fiat_secp384r1_addcarryx_u32(&x1181, &x1182, x1180, x1166, x1163); + fiat_secp384r1_addcarryx_u32(&x1183, &x1184, x1182, x1164, x1161); + fiat_secp384r1_addcarryx_u32(&x1185, &x1186, x1184, x1162, x1159); + fiat_secp384r1_addcarryx_u32(&x1187, &x1188, x1186, x1160, x1157); + fiat_secp384r1_addcarryx_u32(&x1189, &x1190, x1188, x1158, x1155); + x1191 = (x1190 + x1156); + fiat_secp384r1_addcarryx_u32(&x1192, &x1193, 0x0, x1129, x1173); + fiat_secp384r1_addcarryx_u32(&x1194, &x1195, x1193, x1131, x1174); + fiat_secp384r1_addcarryx_u32(&x1196, &x1197, x1195, x1133, 0x0); + fiat_secp384r1_addcarryx_u32(&x1198, &x1199, x1197, x1135, x1171); + fiat_secp384r1_addcarryx_u32(&x1200, &x1201, x1199, x1137, x1175); + fiat_secp384r1_addcarryx_u32(&x1202, &x1203, x1201, x1139, x1177); + fiat_secp384r1_addcarryx_u32(&x1204, &x1205, x1203, x1141, x1179); + fiat_secp384r1_addcarryx_u32(&x1206, &x1207, x1205, x1143, x1181); + fiat_secp384r1_addcarryx_u32(&x1208, &x1209, x1207, x1145, x1183); + fiat_secp384r1_addcarryx_u32(&x1210, &x1211, x1209, x1147, x1185); + fiat_secp384r1_addcarryx_u32(&x1212, &x1213, x1211, x1149, x1187); + fiat_secp384r1_addcarryx_u32(&x1214, &x1215, x1213, x1151, x1189); + fiat_secp384r1_addcarryx_u32(&x1216, &x1217, x1215, x1153, x1191); + x1218 = ((uint32_t)x1217 + x1154); + fiat_secp384r1_mulx_u32(&x1219, &x1220, x9, (arg2[11])); + fiat_secp384r1_mulx_u32(&x1221, &x1222, x9, (arg2[10])); + fiat_secp384r1_mulx_u32(&x1223, &x1224, x9, (arg2[9])); + fiat_secp384r1_mulx_u32(&x1225, &x1226, x9, (arg2[8])); + fiat_secp384r1_mulx_u32(&x1227, &x1228, x9, (arg2[7])); + fiat_secp384r1_mulx_u32(&x1229, &x1230, x9, (arg2[6])); + fiat_secp384r1_mulx_u32(&x1231, &x1232, x9, (arg2[5])); + fiat_secp384r1_mulx_u32(&x1233, &x1234, x9, (arg2[4])); + fiat_secp384r1_mulx_u32(&x1235, &x1236, x9, (arg2[3])); + fiat_secp384r1_mulx_u32(&x1237, &x1238, x9, (arg2[2])); + fiat_secp384r1_mulx_u32(&x1239, &x1240, x9, (arg2[1])); + fiat_secp384r1_mulx_u32(&x1241, &x1242, x9, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x1243, &x1244, 0x0, x1242, x1239); + fiat_secp384r1_addcarryx_u32(&x1245, &x1246, x1244, x1240, x1237); + fiat_secp384r1_addcarryx_u32(&x1247, &x1248, x1246, x1238, x1235); + fiat_secp384r1_addcarryx_u32(&x1249, &x1250, x1248, x1236, x1233); + fiat_secp384r1_addcarryx_u32(&x1251, &x1252, x1250, x1234, x1231); + fiat_secp384r1_addcarryx_u32(&x1253, &x1254, x1252, x1232, x1229); + fiat_secp384r1_addcarryx_u32(&x1255, &x1256, x1254, x1230, x1227); + fiat_secp384r1_addcarryx_u32(&x1257, &x1258, x1256, x1228, x1225); + fiat_secp384r1_addcarryx_u32(&x1259, &x1260, x1258, x1226, x1223); + fiat_secp384r1_addcarryx_u32(&x1261, &x1262, x1260, x1224, x1221); + fiat_secp384r1_addcarryx_u32(&x1263, &x1264, x1262, x1222, x1219); + x1265 = (x1264 + x1220); + fiat_secp384r1_addcarryx_u32(&x1266, &x1267, 0x0, x1194, x1241); + fiat_secp384r1_addcarryx_u32(&x1268, &x1269, x1267, x1196, x1243); + fiat_secp384r1_addcarryx_u32(&x1270, &x1271, x1269, x1198, x1245); + fiat_secp384r1_addcarryx_u32(&x1272, &x1273, x1271, x1200, x1247); + fiat_secp384r1_addcarryx_u32(&x1274, &x1275, x1273, x1202, x1249); + fiat_secp384r1_addcarryx_u32(&x1276, &x1277, x1275, x1204, x1251); + fiat_secp384r1_addcarryx_u32(&x1278, &x1279, x1277, x1206, x1253); + fiat_secp384r1_addcarryx_u32(&x1280, &x1281, x1279, x1208, x1255); + fiat_secp384r1_addcarryx_u32(&x1282, &x1283, x1281, x1210, x1257); + fiat_secp384r1_addcarryx_u32(&x1284, &x1285, x1283, x1212, x1259); + fiat_secp384r1_addcarryx_u32(&x1286, &x1287, x1285, x1214, x1261); + fiat_secp384r1_addcarryx_u32(&x1288, &x1289, x1287, x1216, x1263); + fiat_secp384r1_addcarryx_u32(&x1290, &x1291, x1289, x1218, x1265); + fiat_secp384r1_mulx_u32(&x1292, &x1293, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1294, &x1295, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1296, &x1297, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1298, &x1299, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1300, &x1301, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1302, &x1303, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1304, &x1305, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1306, &x1307, x1266, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1308, &x1309, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1310, &x1311, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1312, &x1313, 0x0, x1309, x1306); + fiat_secp384r1_addcarryx_u32(&x1314, &x1315, x1313, x1307, x1304); + fiat_secp384r1_addcarryx_u32(&x1316, &x1317, x1315, x1305, x1302); + fiat_secp384r1_addcarryx_u32(&x1318, &x1319, x1317, x1303, x1300); + fiat_secp384r1_addcarryx_u32(&x1320, &x1321, x1319, x1301, x1298); + fiat_secp384r1_addcarryx_u32(&x1322, &x1323, x1321, x1299, x1296); + fiat_secp384r1_addcarryx_u32(&x1324, &x1325, x1323, x1297, x1294); + fiat_secp384r1_addcarryx_u32(&x1326, &x1327, x1325, x1295, x1292); + x1328 = (x1327 + x1293); + fiat_secp384r1_addcarryx_u32(&x1329, &x1330, 0x0, x1266, x1310); + fiat_secp384r1_addcarryx_u32(&x1331, &x1332, x1330, x1268, x1311); + fiat_secp384r1_addcarryx_u32(&x1333, &x1334, x1332, x1270, 0x0); + fiat_secp384r1_addcarryx_u32(&x1335, &x1336, x1334, x1272, x1308); + fiat_secp384r1_addcarryx_u32(&x1337, &x1338, x1336, x1274, x1312); + fiat_secp384r1_addcarryx_u32(&x1339, &x1340, x1338, x1276, x1314); + fiat_secp384r1_addcarryx_u32(&x1341, &x1342, x1340, x1278, x1316); + fiat_secp384r1_addcarryx_u32(&x1343, &x1344, x1342, x1280, x1318); + fiat_secp384r1_addcarryx_u32(&x1345, &x1346, x1344, x1282, x1320); + fiat_secp384r1_addcarryx_u32(&x1347, &x1348, x1346, x1284, x1322); + fiat_secp384r1_addcarryx_u32(&x1349, &x1350, x1348, x1286, x1324); + fiat_secp384r1_addcarryx_u32(&x1351, &x1352, x1350, x1288, x1326); + fiat_secp384r1_addcarryx_u32(&x1353, &x1354, x1352, x1290, x1328); + x1355 = ((uint32_t)x1354 + x1291); + fiat_secp384r1_mulx_u32(&x1356, &x1357, x10, (arg2[11])); + fiat_secp384r1_mulx_u32(&x1358, &x1359, x10, (arg2[10])); + fiat_secp384r1_mulx_u32(&x1360, &x1361, x10, (arg2[9])); + fiat_secp384r1_mulx_u32(&x1362, &x1363, x10, (arg2[8])); + fiat_secp384r1_mulx_u32(&x1364, &x1365, x10, (arg2[7])); + fiat_secp384r1_mulx_u32(&x1366, &x1367, x10, (arg2[6])); + fiat_secp384r1_mulx_u32(&x1368, &x1369, x10, (arg2[5])); + fiat_secp384r1_mulx_u32(&x1370, &x1371, x10, (arg2[4])); + fiat_secp384r1_mulx_u32(&x1372, &x1373, x10, (arg2[3])); + fiat_secp384r1_mulx_u32(&x1374, &x1375, x10, (arg2[2])); + fiat_secp384r1_mulx_u32(&x1376, &x1377, x10, (arg2[1])); + fiat_secp384r1_mulx_u32(&x1378, &x1379, x10, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x1380, &x1381, 0x0, x1379, x1376); + fiat_secp384r1_addcarryx_u32(&x1382, &x1383, x1381, x1377, x1374); + fiat_secp384r1_addcarryx_u32(&x1384, &x1385, x1383, x1375, x1372); + fiat_secp384r1_addcarryx_u32(&x1386, &x1387, x1385, x1373, x1370); + fiat_secp384r1_addcarryx_u32(&x1388, &x1389, x1387, x1371, x1368); + fiat_secp384r1_addcarryx_u32(&x1390, &x1391, x1389, x1369, x1366); + fiat_secp384r1_addcarryx_u32(&x1392, &x1393, x1391, x1367, x1364); + fiat_secp384r1_addcarryx_u32(&x1394, &x1395, x1393, x1365, x1362); + fiat_secp384r1_addcarryx_u32(&x1396, &x1397, x1395, x1363, x1360); + fiat_secp384r1_addcarryx_u32(&x1398, &x1399, x1397, x1361, x1358); + fiat_secp384r1_addcarryx_u32(&x1400, &x1401, x1399, x1359, x1356); + x1402 = (x1401 + x1357); + fiat_secp384r1_addcarryx_u32(&x1403, &x1404, 0x0, x1331, x1378); + fiat_secp384r1_addcarryx_u32(&x1405, &x1406, x1404, x1333, x1380); + fiat_secp384r1_addcarryx_u32(&x1407, &x1408, x1406, x1335, x1382); + fiat_secp384r1_addcarryx_u32(&x1409, &x1410, x1408, x1337, x1384); + fiat_secp384r1_addcarryx_u32(&x1411, &x1412, x1410, x1339, x1386); + fiat_secp384r1_addcarryx_u32(&x1413, &x1414, x1412, x1341, x1388); + fiat_secp384r1_addcarryx_u32(&x1415, &x1416, x1414, x1343, x1390); + fiat_secp384r1_addcarryx_u32(&x1417, &x1418, x1416, x1345, x1392); + fiat_secp384r1_addcarryx_u32(&x1419, &x1420, x1418, x1347, x1394); + fiat_secp384r1_addcarryx_u32(&x1421, &x1422, x1420, x1349, x1396); + fiat_secp384r1_addcarryx_u32(&x1423, &x1424, x1422, x1351, x1398); + fiat_secp384r1_addcarryx_u32(&x1425, &x1426, x1424, x1353, x1400); + fiat_secp384r1_addcarryx_u32(&x1427, &x1428, x1426, x1355, x1402); + fiat_secp384r1_mulx_u32(&x1429, &x1430, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1431, &x1432, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1433, &x1434, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1435, &x1436, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1437, &x1438, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1439, &x1440, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1441, &x1442, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1443, &x1444, x1403, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1445, &x1446, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1447, &x1448, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1449, &x1450, 0x0, x1446, x1443); + fiat_secp384r1_addcarryx_u32(&x1451, &x1452, x1450, x1444, x1441); + fiat_secp384r1_addcarryx_u32(&x1453, &x1454, x1452, x1442, x1439); + fiat_secp384r1_addcarryx_u32(&x1455, &x1456, x1454, x1440, x1437); + fiat_secp384r1_addcarryx_u32(&x1457, &x1458, x1456, x1438, x1435); + fiat_secp384r1_addcarryx_u32(&x1459, &x1460, x1458, x1436, x1433); + fiat_secp384r1_addcarryx_u32(&x1461, &x1462, x1460, x1434, x1431); + fiat_secp384r1_addcarryx_u32(&x1463, &x1464, x1462, x1432, x1429); + x1465 = (x1464 + x1430); + fiat_secp384r1_addcarryx_u32(&x1466, &x1467, 0x0, x1403, x1447); + fiat_secp384r1_addcarryx_u32(&x1468, &x1469, x1467, x1405, x1448); + fiat_secp384r1_addcarryx_u32(&x1470, &x1471, x1469, x1407, 0x0); + fiat_secp384r1_addcarryx_u32(&x1472, &x1473, x1471, x1409, x1445); + fiat_secp384r1_addcarryx_u32(&x1474, &x1475, x1473, x1411, x1449); + fiat_secp384r1_addcarryx_u32(&x1476, &x1477, x1475, x1413, x1451); + fiat_secp384r1_addcarryx_u32(&x1478, &x1479, x1477, x1415, x1453); + fiat_secp384r1_addcarryx_u32(&x1480, &x1481, x1479, x1417, x1455); + fiat_secp384r1_addcarryx_u32(&x1482, &x1483, x1481, x1419, x1457); + fiat_secp384r1_addcarryx_u32(&x1484, &x1485, x1483, x1421, x1459); + fiat_secp384r1_addcarryx_u32(&x1486, &x1487, x1485, x1423, x1461); + fiat_secp384r1_addcarryx_u32(&x1488, &x1489, x1487, x1425, x1463); + fiat_secp384r1_addcarryx_u32(&x1490, &x1491, x1489, x1427, x1465); + x1492 = ((uint32_t)x1491 + x1428); + fiat_secp384r1_mulx_u32(&x1493, &x1494, x11, (arg2[11])); + fiat_secp384r1_mulx_u32(&x1495, &x1496, x11, (arg2[10])); + fiat_secp384r1_mulx_u32(&x1497, &x1498, x11, (arg2[9])); + fiat_secp384r1_mulx_u32(&x1499, &x1500, x11, (arg2[8])); + fiat_secp384r1_mulx_u32(&x1501, &x1502, x11, (arg2[7])); + fiat_secp384r1_mulx_u32(&x1503, &x1504, x11, (arg2[6])); + fiat_secp384r1_mulx_u32(&x1505, &x1506, x11, (arg2[5])); + fiat_secp384r1_mulx_u32(&x1507, &x1508, x11, (arg2[4])); + fiat_secp384r1_mulx_u32(&x1509, &x1510, x11, (arg2[3])); + fiat_secp384r1_mulx_u32(&x1511, &x1512, x11, (arg2[2])); + fiat_secp384r1_mulx_u32(&x1513, &x1514, x11, (arg2[1])); + fiat_secp384r1_mulx_u32(&x1515, &x1516, x11, (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x1517, &x1518, 0x0, x1516, x1513); + fiat_secp384r1_addcarryx_u32(&x1519, &x1520, x1518, x1514, x1511); + fiat_secp384r1_addcarryx_u32(&x1521, &x1522, x1520, x1512, x1509); + fiat_secp384r1_addcarryx_u32(&x1523, &x1524, x1522, x1510, x1507); + fiat_secp384r1_addcarryx_u32(&x1525, &x1526, x1524, x1508, x1505); + fiat_secp384r1_addcarryx_u32(&x1527, &x1528, x1526, x1506, x1503); + fiat_secp384r1_addcarryx_u32(&x1529, &x1530, x1528, x1504, x1501); + fiat_secp384r1_addcarryx_u32(&x1531, &x1532, x1530, x1502, x1499); + fiat_secp384r1_addcarryx_u32(&x1533, &x1534, x1532, x1500, x1497); + fiat_secp384r1_addcarryx_u32(&x1535, &x1536, x1534, x1498, x1495); + fiat_secp384r1_addcarryx_u32(&x1537, &x1538, x1536, x1496, x1493); + x1539 = (x1538 + x1494); + fiat_secp384r1_addcarryx_u32(&x1540, &x1541, 0x0, x1468, x1515); + fiat_secp384r1_addcarryx_u32(&x1542, &x1543, x1541, x1470, x1517); + fiat_secp384r1_addcarryx_u32(&x1544, &x1545, x1543, x1472, x1519); + fiat_secp384r1_addcarryx_u32(&x1546, &x1547, x1545, x1474, x1521); + fiat_secp384r1_addcarryx_u32(&x1548, &x1549, x1547, x1476, x1523); + fiat_secp384r1_addcarryx_u32(&x1550, &x1551, x1549, x1478, x1525); + fiat_secp384r1_addcarryx_u32(&x1552, &x1553, x1551, x1480, x1527); + fiat_secp384r1_addcarryx_u32(&x1554, &x1555, x1553, x1482, x1529); + fiat_secp384r1_addcarryx_u32(&x1556, &x1557, x1555, x1484, x1531); + fiat_secp384r1_addcarryx_u32(&x1558, &x1559, x1557, x1486, x1533); + fiat_secp384r1_addcarryx_u32(&x1560, &x1561, x1559, x1488, x1535); + fiat_secp384r1_addcarryx_u32(&x1562, &x1563, x1561, x1490, x1537); + fiat_secp384r1_addcarryx_u32(&x1564, &x1565, x1563, x1492, x1539); + fiat_secp384r1_mulx_u32(&x1566, &x1567, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1568, &x1569, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1570, &x1571, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1572, &x1573, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1574, &x1575, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1576, &x1577, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1578, &x1579, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1580, &x1581, x1540, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1582, &x1583, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1584, &x1585, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1586, &x1587, 0x0, x1583, x1580); + fiat_secp384r1_addcarryx_u32(&x1588, &x1589, x1587, x1581, x1578); + fiat_secp384r1_addcarryx_u32(&x1590, &x1591, x1589, x1579, x1576); + fiat_secp384r1_addcarryx_u32(&x1592, &x1593, x1591, x1577, x1574); + fiat_secp384r1_addcarryx_u32(&x1594, &x1595, x1593, x1575, x1572); + fiat_secp384r1_addcarryx_u32(&x1596, &x1597, x1595, x1573, x1570); + fiat_secp384r1_addcarryx_u32(&x1598, &x1599, x1597, x1571, x1568); + fiat_secp384r1_addcarryx_u32(&x1600, &x1601, x1599, x1569, x1566); + x1602 = (x1601 + x1567); + fiat_secp384r1_addcarryx_u32(&x1603, &x1604, 0x0, x1540, x1584); + fiat_secp384r1_addcarryx_u32(&x1605, &x1606, x1604, x1542, x1585); + fiat_secp384r1_addcarryx_u32(&x1607, &x1608, x1606, x1544, 0x0); + fiat_secp384r1_addcarryx_u32(&x1609, &x1610, x1608, x1546, x1582); + fiat_secp384r1_addcarryx_u32(&x1611, &x1612, x1610, x1548, x1586); + fiat_secp384r1_addcarryx_u32(&x1613, &x1614, x1612, x1550, x1588); + fiat_secp384r1_addcarryx_u32(&x1615, &x1616, x1614, x1552, x1590); + fiat_secp384r1_addcarryx_u32(&x1617, &x1618, x1616, x1554, x1592); + fiat_secp384r1_addcarryx_u32(&x1619, &x1620, x1618, x1556, x1594); + fiat_secp384r1_addcarryx_u32(&x1621, &x1622, x1620, x1558, x1596); + fiat_secp384r1_addcarryx_u32(&x1623, &x1624, x1622, x1560, x1598); + fiat_secp384r1_addcarryx_u32(&x1625, &x1626, x1624, x1562, x1600); + fiat_secp384r1_addcarryx_u32(&x1627, &x1628, x1626, x1564, x1602); + x1629 = ((uint32_t)x1628 + x1565); + fiat_secp384r1_subborrowx_u32(&x1630, &x1631, 0x0, x1605, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1632, &x1633, x1631, x1607, 0x0); + fiat_secp384r1_subborrowx_u32(&x1634, &x1635, x1633, x1609, 0x0); + fiat_secp384r1_subborrowx_u32(&x1636, &x1637, x1635, x1611, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1638, &x1639, x1637, x1613, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x1640, &x1641, x1639, x1615, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1642, &x1643, x1641, x1617, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1644, &x1645, x1643, x1619, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1646, &x1647, x1645, x1621, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1648, &x1649, x1647, x1623, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1650, &x1651, x1649, x1625, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1652, &x1653, x1651, x1627, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1654, &x1655, x1653, x1629, 0x0); + fiat_secp384r1_cmovznz_u32(&x1656, x1655, x1630, x1605); + fiat_secp384r1_cmovznz_u32(&x1657, x1655, x1632, x1607); + fiat_secp384r1_cmovznz_u32(&x1658, x1655, x1634, x1609); + fiat_secp384r1_cmovznz_u32(&x1659, x1655, x1636, x1611); + fiat_secp384r1_cmovznz_u32(&x1660, x1655, x1638, x1613); + fiat_secp384r1_cmovznz_u32(&x1661, x1655, x1640, x1615); + fiat_secp384r1_cmovznz_u32(&x1662, x1655, x1642, x1617); + fiat_secp384r1_cmovznz_u32(&x1663, x1655, x1644, x1619); + fiat_secp384r1_cmovznz_u32(&x1664, x1655, x1646, x1621); + fiat_secp384r1_cmovznz_u32(&x1665, x1655, x1648, x1623); + fiat_secp384r1_cmovznz_u32(&x1666, x1655, x1650, x1625); + fiat_secp384r1_cmovznz_u32(&x1667, x1655, x1652, x1627); + out1[0] = x1656; + out1[1] = x1657; + out1[2] = x1658; + out1[3] = x1659; + out1[4] = x1660; + out1[5] = x1661; + out1[6] = x1662; + out1[7] = x1663; + out1[8] = x1664; + out1[9] = x1665; + out1[10] = x1666; + out1[11] = x1667; +} + +/* + * The function fiat_secp384r1_square squares a field element in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_square( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + fiat_secp384r1_uint1 x38; + uint32_t x39; + fiat_secp384r1_uint1 x40; + uint32_t x41; + fiat_secp384r1_uint1 x42; + uint32_t x43; + fiat_secp384r1_uint1 x44; + uint32_t x45; + fiat_secp384r1_uint1 x46; + uint32_t x47; + fiat_secp384r1_uint1 x48; + uint32_t x49; + fiat_secp384r1_uint1 x50; + uint32_t x51; + fiat_secp384r1_uint1 x52; + uint32_t x53; + fiat_secp384r1_uint1 x54; + uint32_t x55; + fiat_secp384r1_uint1 x56; + uint32_t x57; + fiat_secp384r1_uint1 x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + uint32_t x63; + uint32_t x64; + uint32_t x65; + uint32_t x66; + uint32_t x67; + uint32_t x68; + uint32_t x69; + uint32_t x70; + uint32_t x71; + uint32_t x72; + uint32_t x73; + uint32_t x74; + uint32_t x75; + uint32_t x76; + uint32_t x77; + uint32_t x78; + uint32_t x79; + uint32_t x80; + fiat_secp384r1_uint1 x81; + uint32_t x82; + fiat_secp384r1_uint1 x83; + uint32_t x84; + fiat_secp384r1_uint1 x85; + uint32_t x86; + fiat_secp384r1_uint1 x87; + uint32_t x88; + fiat_secp384r1_uint1 x89; + uint32_t x90; + fiat_secp384r1_uint1 x91; + uint32_t x92; + fiat_secp384r1_uint1 x93; + uint32_t x94; + fiat_secp384r1_uint1 x95; + uint32_t x96; + uint32_t x97; + fiat_secp384r1_uint1 x98; + uint32_t x99; + fiat_secp384r1_uint1 x100; + uint32_t x101; + fiat_secp384r1_uint1 x102; + uint32_t x103; + fiat_secp384r1_uint1 x104; + uint32_t x105; + fiat_secp384r1_uint1 x106; + uint32_t x107; + fiat_secp384r1_uint1 x108; + uint32_t x109; + fiat_secp384r1_uint1 x110; + uint32_t x111; + fiat_secp384r1_uint1 x112; + uint32_t x113; + fiat_secp384r1_uint1 x114; + uint32_t x115; + fiat_secp384r1_uint1 x116; + uint32_t x117; + fiat_secp384r1_uint1 x118; + uint32_t x119; + fiat_secp384r1_uint1 x120; + uint32_t x121; + fiat_secp384r1_uint1 x122; + uint32_t x123; + uint32_t x124; + uint32_t x125; + uint32_t x126; + uint32_t x127; + uint32_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint32_t x133; + uint32_t x134; + uint32_t x135; + uint32_t x136; + uint32_t x137; + uint32_t x138; + uint32_t x139; + uint32_t x140; + uint32_t x141; + uint32_t x142; + uint32_t x143; + uint32_t x144; + uint32_t x145; + uint32_t x146; + uint32_t x147; + fiat_secp384r1_uint1 x148; + uint32_t x149; + fiat_secp384r1_uint1 x150; + uint32_t x151; + fiat_secp384r1_uint1 x152; + uint32_t x153; + fiat_secp384r1_uint1 x154; + uint32_t x155; + fiat_secp384r1_uint1 x156; + uint32_t x157; + fiat_secp384r1_uint1 x158; + uint32_t x159; + fiat_secp384r1_uint1 x160; + uint32_t x161; + fiat_secp384r1_uint1 x162; + uint32_t x163; + fiat_secp384r1_uint1 x164; + uint32_t x165; + fiat_secp384r1_uint1 x166; + uint32_t x167; + fiat_secp384r1_uint1 x168; + uint32_t x169; + uint32_t x170; + fiat_secp384r1_uint1 x171; + uint32_t x172; + fiat_secp384r1_uint1 x173; + uint32_t x174; + fiat_secp384r1_uint1 x175; + uint32_t x176; + fiat_secp384r1_uint1 x177; + uint32_t x178; + fiat_secp384r1_uint1 x179; + uint32_t x180; + fiat_secp384r1_uint1 x181; + uint32_t x182; + fiat_secp384r1_uint1 x183; + uint32_t x184; + fiat_secp384r1_uint1 x185; + uint32_t x186; + fiat_secp384r1_uint1 x187; + uint32_t x188; + fiat_secp384r1_uint1 x189; + uint32_t x190; + fiat_secp384r1_uint1 x191; + uint32_t x192; + fiat_secp384r1_uint1 x193; + uint32_t x194; + fiat_secp384r1_uint1 x195; + uint32_t x196; + uint32_t x197; + uint32_t x198; + uint32_t x199; + uint32_t x200; + uint32_t x201; + uint32_t x202; + uint32_t x203; + uint32_t x204; + uint32_t x205; + uint32_t x206; + uint32_t x207; + uint32_t x208; + uint32_t x209; + uint32_t x210; + uint32_t x211; + uint32_t x212; + uint32_t x213; + uint32_t x214; + uint32_t x215; + uint32_t x216; + fiat_secp384r1_uint1 x217; + uint32_t x218; + fiat_secp384r1_uint1 x219; + uint32_t x220; + fiat_secp384r1_uint1 x221; + uint32_t x222; + fiat_secp384r1_uint1 x223; + uint32_t x224; + fiat_secp384r1_uint1 x225; + uint32_t x226; + fiat_secp384r1_uint1 x227; + uint32_t x228; + fiat_secp384r1_uint1 x229; + uint32_t x230; + fiat_secp384r1_uint1 x231; + uint32_t x232; + uint32_t x233; + fiat_secp384r1_uint1 x234; + uint32_t x235; + fiat_secp384r1_uint1 x236; + uint32_t x237; + fiat_secp384r1_uint1 x238; + uint32_t x239; + fiat_secp384r1_uint1 x240; + uint32_t x241; + fiat_secp384r1_uint1 x242; + uint32_t x243; + fiat_secp384r1_uint1 x244; + uint32_t x245; + fiat_secp384r1_uint1 x246; + uint32_t x247; + fiat_secp384r1_uint1 x248; + uint32_t x249; + fiat_secp384r1_uint1 x250; + uint32_t x251; + fiat_secp384r1_uint1 x252; + uint32_t x253; + fiat_secp384r1_uint1 x254; + uint32_t x255; + fiat_secp384r1_uint1 x256; + uint32_t x257; + fiat_secp384r1_uint1 x258; + uint32_t x259; + uint32_t x260; + uint32_t x261; + uint32_t x262; + uint32_t x263; + uint32_t x264; + uint32_t x265; + uint32_t x266; + uint32_t x267; + uint32_t x268; + uint32_t x269; + uint32_t x270; + uint32_t x271; + uint32_t x272; + uint32_t x273; + uint32_t x274; + uint32_t x275; + uint32_t x276; + uint32_t x277; + uint32_t x278; + uint32_t x279; + uint32_t x280; + uint32_t x281; + uint32_t x282; + uint32_t x283; + uint32_t x284; + fiat_secp384r1_uint1 x285; + uint32_t x286; + fiat_secp384r1_uint1 x287; + uint32_t x288; + fiat_secp384r1_uint1 x289; + uint32_t x290; + fiat_secp384r1_uint1 x291; + uint32_t x292; + fiat_secp384r1_uint1 x293; + uint32_t x294; + fiat_secp384r1_uint1 x295; + uint32_t x296; + fiat_secp384r1_uint1 x297; + uint32_t x298; + fiat_secp384r1_uint1 x299; + uint32_t x300; + fiat_secp384r1_uint1 x301; + uint32_t x302; + fiat_secp384r1_uint1 x303; + uint32_t x304; + fiat_secp384r1_uint1 x305; + uint32_t x306; + uint32_t x307; + fiat_secp384r1_uint1 x308; + uint32_t x309; + fiat_secp384r1_uint1 x310; + uint32_t x311; + fiat_secp384r1_uint1 x312; + uint32_t x313; + fiat_secp384r1_uint1 x314; + uint32_t x315; + fiat_secp384r1_uint1 x316; + uint32_t x317; + fiat_secp384r1_uint1 x318; + uint32_t x319; + fiat_secp384r1_uint1 x320; + uint32_t x321; + fiat_secp384r1_uint1 x322; + uint32_t x323; + fiat_secp384r1_uint1 x324; + uint32_t x325; + fiat_secp384r1_uint1 x326; + uint32_t x327; + fiat_secp384r1_uint1 x328; + uint32_t x329; + fiat_secp384r1_uint1 x330; + uint32_t x331; + fiat_secp384r1_uint1 x332; + uint32_t x333; + uint32_t x334; + uint32_t x335; + uint32_t x336; + uint32_t x337; + uint32_t x338; + uint32_t x339; + uint32_t x340; + uint32_t x341; + uint32_t x342; + uint32_t x343; + uint32_t x344; + uint32_t x345; + uint32_t x346; + uint32_t x347; + uint32_t x348; + uint32_t x349; + uint32_t x350; + uint32_t x351; + uint32_t x352; + uint32_t x353; + fiat_secp384r1_uint1 x354; + uint32_t x355; + fiat_secp384r1_uint1 x356; + uint32_t x357; + fiat_secp384r1_uint1 x358; + uint32_t x359; + fiat_secp384r1_uint1 x360; + uint32_t x361; + fiat_secp384r1_uint1 x362; + uint32_t x363; + fiat_secp384r1_uint1 x364; + uint32_t x365; + fiat_secp384r1_uint1 x366; + uint32_t x367; + fiat_secp384r1_uint1 x368; + uint32_t x369; + uint32_t x370; + fiat_secp384r1_uint1 x371; + uint32_t x372; + fiat_secp384r1_uint1 x373; + uint32_t x374; + fiat_secp384r1_uint1 x375; + uint32_t x376; + fiat_secp384r1_uint1 x377; + uint32_t x378; + fiat_secp384r1_uint1 x379; + uint32_t x380; + fiat_secp384r1_uint1 x381; + uint32_t x382; + fiat_secp384r1_uint1 x383; + uint32_t x384; + fiat_secp384r1_uint1 x385; + uint32_t x386; + fiat_secp384r1_uint1 x387; + uint32_t x388; + fiat_secp384r1_uint1 x389; + uint32_t x390; + fiat_secp384r1_uint1 x391; + uint32_t x392; + fiat_secp384r1_uint1 x393; + uint32_t x394; + fiat_secp384r1_uint1 x395; + uint32_t x396; + uint32_t x397; + uint32_t x398; + uint32_t x399; + uint32_t x400; + uint32_t x401; + uint32_t x402; + uint32_t x403; + uint32_t x404; + uint32_t x405; + uint32_t x406; + uint32_t x407; + uint32_t x408; + uint32_t x409; + uint32_t x410; + uint32_t x411; + uint32_t x412; + uint32_t x413; + uint32_t x414; + uint32_t x415; + uint32_t x416; + uint32_t x417; + uint32_t x418; + uint32_t x419; + uint32_t x420; + uint32_t x421; + fiat_secp384r1_uint1 x422; + uint32_t x423; + fiat_secp384r1_uint1 x424; + uint32_t x425; + fiat_secp384r1_uint1 x426; + uint32_t x427; + fiat_secp384r1_uint1 x428; + uint32_t x429; + fiat_secp384r1_uint1 x430; + uint32_t x431; + fiat_secp384r1_uint1 x432; + uint32_t x433; + fiat_secp384r1_uint1 x434; + uint32_t x435; + fiat_secp384r1_uint1 x436; + uint32_t x437; + fiat_secp384r1_uint1 x438; + uint32_t x439; + fiat_secp384r1_uint1 x440; + uint32_t x441; + fiat_secp384r1_uint1 x442; + uint32_t x443; + uint32_t x444; + fiat_secp384r1_uint1 x445; + uint32_t x446; + fiat_secp384r1_uint1 x447; + uint32_t x448; + fiat_secp384r1_uint1 x449; + uint32_t x450; + fiat_secp384r1_uint1 x451; + uint32_t x452; + fiat_secp384r1_uint1 x453; + uint32_t x454; + fiat_secp384r1_uint1 x455; + uint32_t x456; + fiat_secp384r1_uint1 x457; + uint32_t x458; + fiat_secp384r1_uint1 x459; + uint32_t x460; + fiat_secp384r1_uint1 x461; + uint32_t x462; + fiat_secp384r1_uint1 x463; + uint32_t x464; + fiat_secp384r1_uint1 x465; + uint32_t x466; + fiat_secp384r1_uint1 x467; + uint32_t x468; + fiat_secp384r1_uint1 x469; + uint32_t x470; + uint32_t x471; + uint32_t x472; + uint32_t x473; + uint32_t x474; + uint32_t x475; + uint32_t x476; + uint32_t x477; + uint32_t x478; + uint32_t x479; + uint32_t x480; + uint32_t x481; + uint32_t x482; + uint32_t x483; + uint32_t x484; + uint32_t x485; + uint32_t x486; + uint32_t x487; + uint32_t x488; + uint32_t x489; + uint32_t x490; + fiat_secp384r1_uint1 x491; + uint32_t x492; + fiat_secp384r1_uint1 x493; + uint32_t x494; + fiat_secp384r1_uint1 x495; + uint32_t x496; + fiat_secp384r1_uint1 x497; + uint32_t x498; + fiat_secp384r1_uint1 x499; + uint32_t x500; + fiat_secp384r1_uint1 x501; + uint32_t x502; + fiat_secp384r1_uint1 x503; + uint32_t x504; + fiat_secp384r1_uint1 x505; + uint32_t x506; + uint32_t x507; + fiat_secp384r1_uint1 x508; + uint32_t x509; + fiat_secp384r1_uint1 x510; + uint32_t x511; + fiat_secp384r1_uint1 x512; + uint32_t x513; + fiat_secp384r1_uint1 x514; + uint32_t x515; + fiat_secp384r1_uint1 x516; + uint32_t x517; + fiat_secp384r1_uint1 x518; + uint32_t x519; + fiat_secp384r1_uint1 x520; + uint32_t x521; + fiat_secp384r1_uint1 x522; + uint32_t x523; + fiat_secp384r1_uint1 x524; + uint32_t x525; + fiat_secp384r1_uint1 x526; + uint32_t x527; + fiat_secp384r1_uint1 x528; + uint32_t x529; + fiat_secp384r1_uint1 x530; + uint32_t x531; + fiat_secp384r1_uint1 x532; + uint32_t x533; + uint32_t x534; + uint32_t x535; + uint32_t x536; + uint32_t x537; + uint32_t x538; + uint32_t x539; + uint32_t x540; + uint32_t x541; + uint32_t x542; + uint32_t x543; + uint32_t x544; + uint32_t x545; + uint32_t x546; + uint32_t x547; + uint32_t x548; + uint32_t x549; + uint32_t x550; + uint32_t x551; + uint32_t x552; + uint32_t x553; + uint32_t x554; + uint32_t x555; + uint32_t x556; + uint32_t x557; + uint32_t x558; + fiat_secp384r1_uint1 x559; + uint32_t x560; + fiat_secp384r1_uint1 x561; + uint32_t x562; + fiat_secp384r1_uint1 x563; + uint32_t x564; + fiat_secp384r1_uint1 x565; + uint32_t x566; + fiat_secp384r1_uint1 x567; + uint32_t x568; + fiat_secp384r1_uint1 x569; + uint32_t x570; + fiat_secp384r1_uint1 x571; + uint32_t x572; + fiat_secp384r1_uint1 x573; + uint32_t x574; + fiat_secp384r1_uint1 x575; + uint32_t x576; + fiat_secp384r1_uint1 x577; + uint32_t x578; + fiat_secp384r1_uint1 x579; + uint32_t x580; + uint32_t x581; + fiat_secp384r1_uint1 x582; + uint32_t x583; + fiat_secp384r1_uint1 x584; + uint32_t x585; + fiat_secp384r1_uint1 x586; + uint32_t x587; + fiat_secp384r1_uint1 x588; + uint32_t x589; + fiat_secp384r1_uint1 x590; + uint32_t x591; + fiat_secp384r1_uint1 x592; + uint32_t x593; + fiat_secp384r1_uint1 x594; + uint32_t x595; + fiat_secp384r1_uint1 x596; + uint32_t x597; + fiat_secp384r1_uint1 x598; + uint32_t x599; + fiat_secp384r1_uint1 x600; + uint32_t x601; + fiat_secp384r1_uint1 x602; + uint32_t x603; + fiat_secp384r1_uint1 x604; + uint32_t x605; + fiat_secp384r1_uint1 x606; + uint32_t x607; + uint32_t x608; + uint32_t x609; + uint32_t x610; + uint32_t x611; + uint32_t x612; + uint32_t x613; + uint32_t x614; + uint32_t x615; + uint32_t x616; + uint32_t x617; + uint32_t x618; + uint32_t x619; + uint32_t x620; + uint32_t x621; + uint32_t x622; + uint32_t x623; + uint32_t x624; + uint32_t x625; + uint32_t x626; + uint32_t x627; + fiat_secp384r1_uint1 x628; + uint32_t x629; + fiat_secp384r1_uint1 x630; + uint32_t x631; + fiat_secp384r1_uint1 x632; + uint32_t x633; + fiat_secp384r1_uint1 x634; + uint32_t x635; + fiat_secp384r1_uint1 x636; + uint32_t x637; + fiat_secp384r1_uint1 x638; + uint32_t x639; + fiat_secp384r1_uint1 x640; + uint32_t x641; + fiat_secp384r1_uint1 x642; + uint32_t x643; + uint32_t x644; + fiat_secp384r1_uint1 x645; + uint32_t x646; + fiat_secp384r1_uint1 x647; + uint32_t x648; + fiat_secp384r1_uint1 x649; + uint32_t x650; + fiat_secp384r1_uint1 x651; + uint32_t x652; + fiat_secp384r1_uint1 x653; + uint32_t x654; + fiat_secp384r1_uint1 x655; + uint32_t x656; + fiat_secp384r1_uint1 x657; + uint32_t x658; + fiat_secp384r1_uint1 x659; + uint32_t x660; + fiat_secp384r1_uint1 x661; + uint32_t x662; + fiat_secp384r1_uint1 x663; + uint32_t x664; + fiat_secp384r1_uint1 x665; + uint32_t x666; + fiat_secp384r1_uint1 x667; + uint32_t x668; + fiat_secp384r1_uint1 x669; + uint32_t x670; + uint32_t x671; + uint32_t x672; + uint32_t x673; + uint32_t x674; + uint32_t x675; + uint32_t x676; + uint32_t x677; + uint32_t x678; + uint32_t x679; + uint32_t x680; + uint32_t x681; + uint32_t x682; + uint32_t x683; + uint32_t x684; + uint32_t x685; + uint32_t x686; + uint32_t x687; + uint32_t x688; + uint32_t x689; + uint32_t x690; + uint32_t x691; + uint32_t x692; + uint32_t x693; + uint32_t x694; + uint32_t x695; + fiat_secp384r1_uint1 x696; + uint32_t x697; + fiat_secp384r1_uint1 x698; + uint32_t x699; + fiat_secp384r1_uint1 x700; + uint32_t x701; + fiat_secp384r1_uint1 x702; + uint32_t x703; + fiat_secp384r1_uint1 x704; + uint32_t x705; + fiat_secp384r1_uint1 x706; + uint32_t x707; + fiat_secp384r1_uint1 x708; + uint32_t x709; + fiat_secp384r1_uint1 x710; + uint32_t x711; + fiat_secp384r1_uint1 x712; + uint32_t x713; + fiat_secp384r1_uint1 x714; + uint32_t x715; + fiat_secp384r1_uint1 x716; + uint32_t x717; + uint32_t x718; + fiat_secp384r1_uint1 x719; + uint32_t x720; + fiat_secp384r1_uint1 x721; + uint32_t x722; + fiat_secp384r1_uint1 x723; + uint32_t x724; + fiat_secp384r1_uint1 x725; + uint32_t x726; + fiat_secp384r1_uint1 x727; + uint32_t x728; + fiat_secp384r1_uint1 x729; + uint32_t x730; + fiat_secp384r1_uint1 x731; + uint32_t x732; + fiat_secp384r1_uint1 x733; + uint32_t x734; + fiat_secp384r1_uint1 x735; + uint32_t x736; + fiat_secp384r1_uint1 x737; + uint32_t x738; + fiat_secp384r1_uint1 x739; + uint32_t x740; + fiat_secp384r1_uint1 x741; + uint32_t x742; + fiat_secp384r1_uint1 x743; + uint32_t x744; + uint32_t x745; + uint32_t x746; + uint32_t x747; + uint32_t x748; + uint32_t x749; + uint32_t x750; + uint32_t x751; + uint32_t x752; + uint32_t x753; + uint32_t x754; + uint32_t x755; + uint32_t x756; + uint32_t x757; + uint32_t x758; + uint32_t x759; + uint32_t x760; + uint32_t x761; + uint32_t x762; + uint32_t x763; + uint32_t x764; + fiat_secp384r1_uint1 x765; + uint32_t x766; + fiat_secp384r1_uint1 x767; + uint32_t x768; + fiat_secp384r1_uint1 x769; + uint32_t x770; + fiat_secp384r1_uint1 x771; + uint32_t x772; + fiat_secp384r1_uint1 x773; + uint32_t x774; + fiat_secp384r1_uint1 x775; + uint32_t x776; + fiat_secp384r1_uint1 x777; + uint32_t x778; + fiat_secp384r1_uint1 x779; + uint32_t x780; + uint32_t x781; + fiat_secp384r1_uint1 x782; + uint32_t x783; + fiat_secp384r1_uint1 x784; + uint32_t x785; + fiat_secp384r1_uint1 x786; + uint32_t x787; + fiat_secp384r1_uint1 x788; + uint32_t x789; + fiat_secp384r1_uint1 x790; + uint32_t x791; + fiat_secp384r1_uint1 x792; + uint32_t x793; + fiat_secp384r1_uint1 x794; + uint32_t x795; + fiat_secp384r1_uint1 x796; + uint32_t x797; + fiat_secp384r1_uint1 x798; + uint32_t x799; + fiat_secp384r1_uint1 x800; + uint32_t x801; + fiat_secp384r1_uint1 x802; + uint32_t x803; + fiat_secp384r1_uint1 x804; + uint32_t x805; + fiat_secp384r1_uint1 x806; + uint32_t x807; + uint32_t x808; + uint32_t x809; + uint32_t x810; + uint32_t x811; + uint32_t x812; + uint32_t x813; + uint32_t x814; + uint32_t x815; + uint32_t x816; + uint32_t x817; + uint32_t x818; + uint32_t x819; + uint32_t x820; + uint32_t x821; + uint32_t x822; + uint32_t x823; + uint32_t x824; + uint32_t x825; + uint32_t x826; + uint32_t x827; + uint32_t x828; + uint32_t x829; + uint32_t x830; + uint32_t x831; + uint32_t x832; + fiat_secp384r1_uint1 x833; + uint32_t x834; + fiat_secp384r1_uint1 x835; + uint32_t x836; + fiat_secp384r1_uint1 x837; + uint32_t x838; + fiat_secp384r1_uint1 x839; + uint32_t x840; + fiat_secp384r1_uint1 x841; + uint32_t x842; + fiat_secp384r1_uint1 x843; + uint32_t x844; + fiat_secp384r1_uint1 x845; + uint32_t x846; + fiat_secp384r1_uint1 x847; + uint32_t x848; + fiat_secp384r1_uint1 x849; + uint32_t x850; + fiat_secp384r1_uint1 x851; + uint32_t x852; + fiat_secp384r1_uint1 x853; + uint32_t x854; + uint32_t x855; + fiat_secp384r1_uint1 x856; + uint32_t x857; + fiat_secp384r1_uint1 x858; + uint32_t x859; + fiat_secp384r1_uint1 x860; + uint32_t x861; + fiat_secp384r1_uint1 x862; + uint32_t x863; + fiat_secp384r1_uint1 x864; + uint32_t x865; + fiat_secp384r1_uint1 x866; + uint32_t x867; + fiat_secp384r1_uint1 x868; + uint32_t x869; + fiat_secp384r1_uint1 x870; + uint32_t x871; + fiat_secp384r1_uint1 x872; + uint32_t x873; + fiat_secp384r1_uint1 x874; + uint32_t x875; + fiat_secp384r1_uint1 x876; + uint32_t x877; + fiat_secp384r1_uint1 x878; + uint32_t x879; + fiat_secp384r1_uint1 x880; + uint32_t x881; + uint32_t x882; + uint32_t x883; + uint32_t x884; + uint32_t x885; + uint32_t x886; + uint32_t x887; + uint32_t x888; + uint32_t x889; + uint32_t x890; + uint32_t x891; + uint32_t x892; + uint32_t x893; + uint32_t x894; + uint32_t x895; + uint32_t x896; + uint32_t x897; + uint32_t x898; + uint32_t x899; + uint32_t x900; + uint32_t x901; + fiat_secp384r1_uint1 x902; + uint32_t x903; + fiat_secp384r1_uint1 x904; + uint32_t x905; + fiat_secp384r1_uint1 x906; + uint32_t x907; + fiat_secp384r1_uint1 x908; + uint32_t x909; + fiat_secp384r1_uint1 x910; + uint32_t x911; + fiat_secp384r1_uint1 x912; + uint32_t x913; + fiat_secp384r1_uint1 x914; + uint32_t x915; + fiat_secp384r1_uint1 x916; + uint32_t x917; + uint32_t x918; + fiat_secp384r1_uint1 x919; + uint32_t x920; + fiat_secp384r1_uint1 x921; + uint32_t x922; + fiat_secp384r1_uint1 x923; + uint32_t x924; + fiat_secp384r1_uint1 x925; + uint32_t x926; + fiat_secp384r1_uint1 x927; + uint32_t x928; + fiat_secp384r1_uint1 x929; + uint32_t x930; + fiat_secp384r1_uint1 x931; + uint32_t x932; + fiat_secp384r1_uint1 x933; + uint32_t x934; + fiat_secp384r1_uint1 x935; + uint32_t x936; + fiat_secp384r1_uint1 x937; + uint32_t x938; + fiat_secp384r1_uint1 x939; + uint32_t x940; + fiat_secp384r1_uint1 x941; + uint32_t x942; + fiat_secp384r1_uint1 x943; + uint32_t x944; + uint32_t x945; + uint32_t x946; + uint32_t x947; + uint32_t x948; + uint32_t x949; + uint32_t x950; + uint32_t x951; + uint32_t x952; + uint32_t x953; + uint32_t x954; + uint32_t x955; + uint32_t x956; + uint32_t x957; + uint32_t x958; + uint32_t x959; + uint32_t x960; + uint32_t x961; + uint32_t x962; + uint32_t x963; + uint32_t x964; + uint32_t x965; + uint32_t x966; + uint32_t x967; + uint32_t x968; + uint32_t x969; + fiat_secp384r1_uint1 x970; + uint32_t x971; + fiat_secp384r1_uint1 x972; + uint32_t x973; + fiat_secp384r1_uint1 x974; + uint32_t x975; + fiat_secp384r1_uint1 x976; + uint32_t x977; + fiat_secp384r1_uint1 x978; + uint32_t x979; + fiat_secp384r1_uint1 x980; + uint32_t x981; + fiat_secp384r1_uint1 x982; + uint32_t x983; + fiat_secp384r1_uint1 x984; + uint32_t x985; + fiat_secp384r1_uint1 x986; + uint32_t x987; + fiat_secp384r1_uint1 x988; + uint32_t x989; + fiat_secp384r1_uint1 x990; + uint32_t x991; + uint32_t x992; + fiat_secp384r1_uint1 x993; + uint32_t x994; + fiat_secp384r1_uint1 x995; + uint32_t x996; + fiat_secp384r1_uint1 x997; + uint32_t x998; + fiat_secp384r1_uint1 x999; + uint32_t x1000; + fiat_secp384r1_uint1 x1001; + uint32_t x1002; + fiat_secp384r1_uint1 x1003; + uint32_t x1004; + fiat_secp384r1_uint1 x1005; + uint32_t x1006; + fiat_secp384r1_uint1 x1007; + uint32_t x1008; + fiat_secp384r1_uint1 x1009; + uint32_t x1010; + fiat_secp384r1_uint1 x1011; + uint32_t x1012; + fiat_secp384r1_uint1 x1013; + uint32_t x1014; + fiat_secp384r1_uint1 x1015; + uint32_t x1016; + fiat_secp384r1_uint1 x1017; + uint32_t x1018; + uint32_t x1019; + uint32_t x1020; + uint32_t x1021; + uint32_t x1022; + uint32_t x1023; + uint32_t x1024; + uint32_t x1025; + uint32_t x1026; + uint32_t x1027; + uint32_t x1028; + uint32_t x1029; + uint32_t x1030; + uint32_t x1031; + uint32_t x1032; + uint32_t x1033; + uint32_t x1034; + uint32_t x1035; + uint32_t x1036; + uint32_t x1037; + uint32_t x1038; + fiat_secp384r1_uint1 x1039; + uint32_t x1040; + fiat_secp384r1_uint1 x1041; + uint32_t x1042; + fiat_secp384r1_uint1 x1043; + uint32_t x1044; + fiat_secp384r1_uint1 x1045; + uint32_t x1046; + fiat_secp384r1_uint1 x1047; + uint32_t x1048; + fiat_secp384r1_uint1 x1049; + uint32_t x1050; + fiat_secp384r1_uint1 x1051; + uint32_t x1052; + fiat_secp384r1_uint1 x1053; + uint32_t x1054; + uint32_t x1055; + fiat_secp384r1_uint1 x1056; + uint32_t x1057; + fiat_secp384r1_uint1 x1058; + uint32_t x1059; + fiat_secp384r1_uint1 x1060; + uint32_t x1061; + fiat_secp384r1_uint1 x1062; + uint32_t x1063; + fiat_secp384r1_uint1 x1064; + uint32_t x1065; + fiat_secp384r1_uint1 x1066; + uint32_t x1067; + fiat_secp384r1_uint1 x1068; + uint32_t x1069; + fiat_secp384r1_uint1 x1070; + uint32_t x1071; + fiat_secp384r1_uint1 x1072; + uint32_t x1073; + fiat_secp384r1_uint1 x1074; + uint32_t x1075; + fiat_secp384r1_uint1 x1076; + uint32_t x1077; + fiat_secp384r1_uint1 x1078; + uint32_t x1079; + fiat_secp384r1_uint1 x1080; + uint32_t x1081; + uint32_t x1082; + uint32_t x1083; + uint32_t x1084; + uint32_t x1085; + uint32_t x1086; + uint32_t x1087; + uint32_t x1088; + uint32_t x1089; + uint32_t x1090; + uint32_t x1091; + uint32_t x1092; + uint32_t x1093; + uint32_t x1094; + uint32_t x1095; + uint32_t x1096; + uint32_t x1097; + uint32_t x1098; + uint32_t x1099; + uint32_t x1100; + uint32_t x1101; + uint32_t x1102; + uint32_t x1103; + uint32_t x1104; + uint32_t x1105; + uint32_t x1106; + fiat_secp384r1_uint1 x1107; + uint32_t x1108; + fiat_secp384r1_uint1 x1109; + uint32_t x1110; + fiat_secp384r1_uint1 x1111; + uint32_t x1112; + fiat_secp384r1_uint1 x1113; + uint32_t x1114; + fiat_secp384r1_uint1 x1115; + uint32_t x1116; + fiat_secp384r1_uint1 x1117; + uint32_t x1118; + fiat_secp384r1_uint1 x1119; + uint32_t x1120; + fiat_secp384r1_uint1 x1121; + uint32_t x1122; + fiat_secp384r1_uint1 x1123; + uint32_t x1124; + fiat_secp384r1_uint1 x1125; + uint32_t x1126; + fiat_secp384r1_uint1 x1127; + uint32_t x1128; + uint32_t x1129; + fiat_secp384r1_uint1 x1130; + uint32_t x1131; + fiat_secp384r1_uint1 x1132; + uint32_t x1133; + fiat_secp384r1_uint1 x1134; + uint32_t x1135; + fiat_secp384r1_uint1 x1136; + uint32_t x1137; + fiat_secp384r1_uint1 x1138; + uint32_t x1139; + fiat_secp384r1_uint1 x1140; + uint32_t x1141; + fiat_secp384r1_uint1 x1142; + uint32_t x1143; + fiat_secp384r1_uint1 x1144; + uint32_t x1145; + fiat_secp384r1_uint1 x1146; + uint32_t x1147; + fiat_secp384r1_uint1 x1148; + uint32_t x1149; + fiat_secp384r1_uint1 x1150; + uint32_t x1151; + fiat_secp384r1_uint1 x1152; + uint32_t x1153; + fiat_secp384r1_uint1 x1154; + uint32_t x1155; + uint32_t x1156; + uint32_t x1157; + uint32_t x1158; + uint32_t x1159; + uint32_t x1160; + uint32_t x1161; + uint32_t x1162; + uint32_t x1163; + uint32_t x1164; + uint32_t x1165; + uint32_t x1166; + uint32_t x1167; + uint32_t x1168; + uint32_t x1169; + uint32_t x1170; + uint32_t x1171; + uint32_t x1172; + uint32_t x1173; + uint32_t x1174; + uint32_t x1175; + fiat_secp384r1_uint1 x1176; + uint32_t x1177; + fiat_secp384r1_uint1 x1178; + uint32_t x1179; + fiat_secp384r1_uint1 x1180; + uint32_t x1181; + fiat_secp384r1_uint1 x1182; + uint32_t x1183; + fiat_secp384r1_uint1 x1184; + uint32_t x1185; + fiat_secp384r1_uint1 x1186; + uint32_t x1187; + fiat_secp384r1_uint1 x1188; + uint32_t x1189; + fiat_secp384r1_uint1 x1190; + uint32_t x1191; + uint32_t x1192; + fiat_secp384r1_uint1 x1193; + uint32_t x1194; + fiat_secp384r1_uint1 x1195; + uint32_t x1196; + fiat_secp384r1_uint1 x1197; + uint32_t x1198; + fiat_secp384r1_uint1 x1199; + uint32_t x1200; + fiat_secp384r1_uint1 x1201; + uint32_t x1202; + fiat_secp384r1_uint1 x1203; + uint32_t x1204; + fiat_secp384r1_uint1 x1205; + uint32_t x1206; + fiat_secp384r1_uint1 x1207; + uint32_t x1208; + fiat_secp384r1_uint1 x1209; + uint32_t x1210; + fiat_secp384r1_uint1 x1211; + uint32_t x1212; + fiat_secp384r1_uint1 x1213; + uint32_t x1214; + fiat_secp384r1_uint1 x1215; + uint32_t x1216; + fiat_secp384r1_uint1 x1217; + uint32_t x1218; + uint32_t x1219; + uint32_t x1220; + uint32_t x1221; + uint32_t x1222; + uint32_t x1223; + uint32_t x1224; + uint32_t x1225; + uint32_t x1226; + uint32_t x1227; + uint32_t x1228; + uint32_t x1229; + uint32_t x1230; + uint32_t x1231; + uint32_t x1232; + uint32_t x1233; + uint32_t x1234; + uint32_t x1235; + uint32_t x1236; + uint32_t x1237; + uint32_t x1238; + uint32_t x1239; + uint32_t x1240; + uint32_t x1241; + uint32_t x1242; + uint32_t x1243; + fiat_secp384r1_uint1 x1244; + uint32_t x1245; + fiat_secp384r1_uint1 x1246; + uint32_t x1247; + fiat_secp384r1_uint1 x1248; + uint32_t x1249; + fiat_secp384r1_uint1 x1250; + uint32_t x1251; + fiat_secp384r1_uint1 x1252; + uint32_t x1253; + fiat_secp384r1_uint1 x1254; + uint32_t x1255; + fiat_secp384r1_uint1 x1256; + uint32_t x1257; + fiat_secp384r1_uint1 x1258; + uint32_t x1259; + fiat_secp384r1_uint1 x1260; + uint32_t x1261; + fiat_secp384r1_uint1 x1262; + uint32_t x1263; + fiat_secp384r1_uint1 x1264; + uint32_t x1265; + uint32_t x1266; + fiat_secp384r1_uint1 x1267; + uint32_t x1268; + fiat_secp384r1_uint1 x1269; + uint32_t x1270; + fiat_secp384r1_uint1 x1271; + uint32_t x1272; + fiat_secp384r1_uint1 x1273; + uint32_t x1274; + fiat_secp384r1_uint1 x1275; + uint32_t x1276; + fiat_secp384r1_uint1 x1277; + uint32_t x1278; + fiat_secp384r1_uint1 x1279; + uint32_t x1280; + fiat_secp384r1_uint1 x1281; + uint32_t x1282; + fiat_secp384r1_uint1 x1283; + uint32_t x1284; + fiat_secp384r1_uint1 x1285; + uint32_t x1286; + fiat_secp384r1_uint1 x1287; + uint32_t x1288; + fiat_secp384r1_uint1 x1289; + uint32_t x1290; + fiat_secp384r1_uint1 x1291; + uint32_t x1292; + uint32_t x1293; + uint32_t x1294; + uint32_t x1295; + uint32_t x1296; + uint32_t x1297; + uint32_t x1298; + uint32_t x1299; + uint32_t x1300; + uint32_t x1301; + uint32_t x1302; + uint32_t x1303; + uint32_t x1304; + uint32_t x1305; + uint32_t x1306; + uint32_t x1307; + uint32_t x1308; + uint32_t x1309; + uint32_t x1310; + uint32_t x1311; + uint32_t x1312; + fiat_secp384r1_uint1 x1313; + uint32_t x1314; + fiat_secp384r1_uint1 x1315; + uint32_t x1316; + fiat_secp384r1_uint1 x1317; + uint32_t x1318; + fiat_secp384r1_uint1 x1319; + uint32_t x1320; + fiat_secp384r1_uint1 x1321; + uint32_t x1322; + fiat_secp384r1_uint1 x1323; + uint32_t x1324; + fiat_secp384r1_uint1 x1325; + uint32_t x1326; + fiat_secp384r1_uint1 x1327; + uint32_t x1328; + uint32_t x1329; + fiat_secp384r1_uint1 x1330; + uint32_t x1331; + fiat_secp384r1_uint1 x1332; + uint32_t x1333; + fiat_secp384r1_uint1 x1334; + uint32_t x1335; + fiat_secp384r1_uint1 x1336; + uint32_t x1337; + fiat_secp384r1_uint1 x1338; + uint32_t x1339; + fiat_secp384r1_uint1 x1340; + uint32_t x1341; + fiat_secp384r1_uint1 x1342; + uint32_t x1343; + fiat_secp384r1_uint1 x1344; + uint32_t x1345; + fiat_secp384r1_uint1 x1346; + uint32_t x1347; + fiat_secp384r1_uint1 x1348; + uint32_t x1349; + fiat_secp384r1_uint1 x1350; + uint32_t x1351; + fiat_secp384r1_uint1 x1352; + uint32_t x1353; + fiat_secp384r1_uint1 x1354; + uint32_t x1355; + uint32_t x1356; + uint32_t x1357; + uint32_t x1358; + uint32_t x1359; + uint32_t x1360; + uint32_t x1361; + uint32_t x1362; + uint32_t x1363; + uint32_t x1364; + uint32_t x1365; + uint32_t x1366; + uint32_t x1367; + uint32_t x1368; + uint32_t x1369; + uint32_t x1370; + uint32_t x1371; + uint32_t x1372; + uint32_t x1373; + uint32_t x1374; + uint32_t x1375; + uint32_t x1376; + uint32_t x1377; + uint32_t x1378; + uint32_t x1379; + uint32_t x1380; + fiat_secp384r1_uint1 x1381; + uint32_t x1382; + fiat_secp384r1_uint1 x1383; + uint32_t x1384; + fiat_secp384r1_uint1 x1385; + uint32_t x1386; + fiat_secp384r1_uint1 x1387; + uint32_t x1388; + fiat_secp384r1_uint1 x1389; + uint32_t x1390; + fiat_secp384r1_uint1 x1391; + uint32_t x1392; + fiat_secp384r1_uint1 x1393; + uint32_t x1394; + fiat_secp384r1_uint1 x1395; + uint32_t x1396; + fiat_secp384r1_uint1 x1397; + uint32_t x1398; + fiat_secp384r1_uint1 x1399; + uint32_t x1400; + fiat_secp384r1_uint1 x1401; + uint32_t x1402; + uint32_t x1403; + fiat_secp384r1_uint1 x1404; + uint32_t x1405; + fiat_secp384r1_uint1 x1406; + uint32_t x1407; + fiat_secp384r1_uint1 x1408; + uint32_t x1409; + fiat_secp384r1_uint1 x1410; + uint32_t x1411; + fiat_secp384r1_uint1 x1412; + uint32_t x1413; + fiat_secp384r1_uint1 x1414; + uint32_t x1415; + fiat_secp384r1_uint1 x1416; + uint32_t x1417; + fiat_secp384r1_uint1 x1418; + uint32_t x1419; + fiat_secp384r1_uint1 x1420; + uint32_t x1421; + fiat_secp384r1_uint1 x1422; + uint32_t x1423; + fiat_secp384r1_uint1 x1424; + uint32_t x1425; + fiat_secp384r1_uint1 x1426; + uint32_t x1427; + fiat_secp384r1_uint1 x1428; + uint32_t x1429; + uint32_t x1430; + uint32_t x1431; + uint32_t x1432; + uint32_t x1433; + uint32_t x1434; + uint32_t x1435; + uint32_t x1436; + uint32_t x1437; + uint32_t x1438; + uint32_t x1439; + uint32_t x1440; + uint32_t x1441; + uint32_t x1442; + uint32_t x1443; + uint32_t x1444; + uint32_t x1445; + uint32_t x1446; + uint32_t x1447; + uint32_t x1448; + uint32_t x1449; + fiat_secp384r1_uint1 x1450; + uint32_t x1451; + fiat_secp384r1_uint1 x1452; + uint32_t x1453; + fiat_secp384r1_uint1 x1454; + uint32_t x1455; + fiat_secp384r1_uint1 x1456; + uint32_t x1457; + fiat_secp384r1_uint1 x1458; + uint32_t x1459; + fiat_secp384r1_uint1 x1460; + uint32_t x1461; + fiat_secp384r1_uint1 x1462; + uint32_t x1463; + fiat_secp384r1_uint1 x1464; + uint32_t x1465; + uint32_t x1466; + fiat_secp384r1_uint1 x1467; + uint32_t x1468; + fiat_secp384r1_uint1 x1469; + uint32_t x1470; + fiat_secp384r1_uint1 x1471; + uint32_t x1472; + fiat_secp384r1_uint1 x1473; + uint32_t x1474; + fiat_secp384r1_uint1 x1475; + uint32_t x1476; + fiat_secp384r1_uint1 x1477; + uint32_t x1478; + fiat_secp384r1_uint1 x1479; + uint32_t x1480; + fiat_secp384r1_uint1 x1481; + uint32_t x1482; + fiat_secp384r1_uint1 x1483; + uint32_t x1484; + fiat_secp384r1_uint1 x1485; + uint32_t x1486; + fiat_secp384r1_uint1 x1487; + uint32_t x1488; + fiat_secp384r1_uint1 x1489; + uint32_t x1490; + fiat_secp384r1_uint1 x1491; + uint32_t x1492; + uint32_t x1493; + uint32_t x1494; + uint32_t x1495; + uint32_t x1496; + uint32_t x1497; + uint32_t x1498; + uint32_t x1499; + uint32_t x1500; + uint32_t x1501; + uint32_t x1502; + uint32_t x1503; + uint32_t x1504; + uint32_t x1505; + uint32_t x1506; + uint32_t x1507; + uint32_t x1508; + uint32_t x1509; + uint32_t x1510; + uint32_t x1511; + uint32_t x1512; + uint32_t x1513; + uint32_t x1514; + uint32_t x1515; + uint32_t x1516; + uint32_t x1517; + fiat_secp384r1_uint1 x1518; + uint32_t x1519; + fiat_secp384r1_uint1 x1520; + uint32_t x1521; + fiat_secp384r1_uint1 x1522; + uint32_t x1523; + fiat_secp384r1_uint1 x1524; + uint32_t x1525; + fiat_secp384r1_uint1 x1526; + uint32_t x1527; + fiat_secp384r1_uint1 x1528; + uint32_t x1529; + fiat_secp384r1_uint1 x1530; + uint32_t x1531; + fiat_secp384r1_uint1 x1532; + uint32_t x1533; + fiat_secp384r1_uint1 x1534; + uint32_t x1535; + fiat_secp384r1_uint1 x1536; + uint32_t x1537; + fiat_secp384r1_uint1 x1538; + uint32_t x1539; + uint32_t x1540; + fiat_secp384r1_uint1 x1541; + uint32_t x1542; + fiat_secp384r1_uint1 x1543; + uint32_t x1544; + fiat_secp384r1_uint1 x1545; + uint32_t x1546; + fiat_secp384r1_uint1 x1547; + uint32_t x1548; + fiat_secp384r1_uint1 x1549; + uint32_t x1550; + fiat_secp384r1_uint1 x1551; + uint32_t x1552; + fiat_secp384r1_uint1 x1553; + uint32_t x1554; + fiat_secp384r1_uint1 x1555; + uint32_t x1556; + fiat_secp384r1_uint1 x1557; + uint32_t x1558; + fiat_secp384r1_uint1 x1559; + uint32_t x1560; + fiat_secp384r1_uint1 x1561; + uint32_t x1562; + fiat_secp384r1_uint1 x1563; + uint32_t x1564; + fiat_secp384r1_uint1 x1565; + uint32_t x1566; + uint32_t x1567; + uint32_t x1568; + uint32_t x1569; + uint32_t x1570; + uint32_t x1571; + uint32_t x1572; + uint32_t x1573; + uint32_t x1574; + uint32_t x1575; + uint32_t x1576; + uint32_t x1577; + uint32_t x1578; + uint32_t x1579; + uint32_t x1580; + uint32_t x1581; + uint32_t x1582; + uint32_t x1583; + uint32_t x1584; + uint32_t x1585; + uint32_t x1586; + fiat_secp384r1_uint1 x1587; + uint32_t x1588; + fiat_secp384r1_uint1 x1589; + uint32_t x1590; + fiat_secp384r1_uint1 x1591; + uint32_t x1592; + fiat_secp384r1_uint1 x1593; + uint32_t x1594; + fiat_secp384r1_uint1 x1595; + uint32_t x1596; + fiat_secp384r1_uint1 x1597; + uint32_t x1598; + fiat_secp384r1_uint1 x1599; + uint32_t x1600; + fiat_secp384r1_uint1 x1601; + uint32_t x1602; + uint32_t x1603; + fiat_secp384r1_uint1 x1604; + uint32_t x1605; + fiat_secp384r1_uint1 x1606; + uint32_t x1607; + fiat_secp384r1_uint1 x1608; + uint32_t x1609; + fiat_secp384r1_uint1 x1610; + uint32_t x1611; + fiat_secp384r1_uint1 x1612; + uint32_t x1613; + fiat_secp384r1_uint1 x1614; + uint32_t x1615; + fiat_secp384r1_uint1 x1616; + uint32_t x1617; + fiat_secp384r1_uint1 x1618; + uint32_t x1619; + fiat_secp384r1_uint1 x1620; + uint32_t x1621; + fiat_secp384r1_uint1 x1622; + uint32_t x1623; + fiat_secp384r1_uint1 x1624; + uint32_t x1625; + fiat_secp384r1_uint1 x1626; + uint32_t x1627; + fiat_secp384r1_uint1 x1628; + uint32_t x1629; + uint32_t x1630; + fiat_secp384r1_uint1 x1631; + uint32_t x1632; + fiat_secp384r1_uint1 x1633; + uint32_t x1634; + fiat_secp384r1_uint1 x1635; + uint32_t x1636; + fiat_secp384r1_uint1 x1637; + uint32_t x1638; + fiat_secp384r1_uint1 x1639; + uint32_t x1640; + fiat_secp384r1_uint1 x1641; + uint32_t x1642; + fiat_secp384r1_uint1 x1643; + uint32_t x1644; + fiat_secp384r1_uint1 x1645; + uint32_t x1646; + fiat_secp384r1_uint1 x1647; + uint32_t x1648; + fiat_secp384r1_uint1 x1649; + uint32_t x1650; + fiat_secp384r1_uint1 x1651; + uint32_t x1652; + fiat_secp384r1_uint1 x1653; + uint32_t x1654; + fiat_secp384r1_uint1 x1655; + uint32_t x1656; + uint32_t x1657; + uint32_t x1658; + uint32_t x1659; + uint32_t x1660; + uint32_t x1661; + uint32_t x1662; + uint32_t x1663; + uint32_t x1664; + uint32_t x1665; + uint32_t x1666; + uint32_t x1667; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[4]); + x5 = (arg1[5]); + x6 = (arg1[6]); + x7 = (arg1[7]); + x8 = (arg1[8]); + x9 = (arg1[9]); + x10 = (arg1[10]); + x11 = (arg1[11]); + x12 = (arg1[0]); + fiat_secp384r1_mulx_u32(&x13, &x14, x12, (arg1[11])); + fiat_secp384r1_mulx_u32(&x15, &x16, x12, (arg1[10])); + fiat_secp384r1_mulx_u32(&x17, &x18, x12, (arg1[9])); + fiat_secp384r1_mulx_u32(&x19, &x20, x12, (arg1[8])); + fiat_secp384r1_mulx_u32(&x21, &x22, x12, (arg1[7])); + fiat_secp384r1_mulx_u32(&x23, &x24, x12, (arg1[6])); + fiat_secp384r1_mulx_u32(&x25, &x26, x12, (arg1[5])); + fiat_secp384r1_mulx_u32(&x27, &x28, x12, (arg1[4])); + fiat_secp384r1_mulx_u32(&x29, &x30, x12, (arg1[3])); + fiat_secp384r1_mulx_u32(&x31, &x32, x12, (arg1[2])); + fiat_secp384r1_mulx_u32(&x33, &x34, x12, (arg1[1])); + fiat_secp384r1_mulx_u32(&x35, &x36, x12, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x37, &x38, 0x0, x36, x33); + fiat_secp384r1_addcarryx_u32(&x39, &x40, x38, x34, x31); + fiat_secp384r1_addcarryx_u32(&x41, &x42, x40, x32, x29); + fiat_secp384r1_addcarryx_u32(&x43, &x44, x42, x30, x27); + fiat_secp384r1_addcarryx_u32(&x45, &x46, x44, x28, x25); + fiat_secp384r1_addcarryx_u32(&x47, &x48, x46, x26, x23); + fiat_secp384r1_addcarryx_u32(&x49, &x50, x48, x24, x21); + fiat_secp384r1_addcarryx_u32(&x51, &x52, x50, x22, x19); + fiat_secp384r1_addcarryx_u32(&x53, &x54, x52, x20, x17); + fiat_secp384r1_addcarryx_u32(&x55, &x56, x54, x18, x15); + fiat_secp384r1_addcarryx_u32(&x57, &x58, x56, x16, x13); + x59 = (x58 + x14); + fiat_secp384r1_mulx_u32(&x60, &x61, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x62, &x63, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x64, &x65, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x66, &x67, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x68, &x69, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x70, &x71, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x72, &x73, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x74, &x75, x35, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x76, &x77, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x78, &x79, x35, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x80, &x81, 0x0, x77, x74); + fiat_secp384r1_addcarryx_u32(&x82, &x83, x81, x75, x72); + fiat_secp384r1_addcarryx_u32(&x84, &x85, x83, x73, x70); + fiat_secp384r1_addcarryx_u32(&x86, &x87, x85, x71, x68); + fiat_secp384r1_addcarryx_u32(&x88, &x89, x87, x69, x66); + fiat_secp384r1_addcarryx_u32(&x90, &x91, x89, x67, x64); + fiat_secp384r1_addcarryx_u32(&x92, &x93, x91, x65, x62); + fiat_secp384r1_addcarryx_u32(&x94, &x95, x93, x63, x60); + x96 = (x95 + x61); + fiat_secp384r1_addcarryx_u32(&x97, &x98, 0x0, x35, x78); + fiat_secp384r1_addcarryx_u32(&x99, &x100, x98, x37, x79); + fiat_secp384r1_addcarryx_u32(&x101, &x102, x100, x39, 0x0); + fiat_secp384r1_addcarryx_u32(&x103, &x104, x102, x41, x76); + fiat_secp384r1_addcarryx_u32(&x105, &x106, x104, x43, x80); + fiat_secp384r1_addcarryx_u32(&x107, &x108, x106, x45, x82); + fiat_secp384r1_addcarryx_u32(&x109, &x110, x108, x47, x84); + fiat_secp384r1_addcarryx_u32(&x111, &x112, x110, x49, x86); + fiat_secp384r1_addcarryx_u32(&x113, &x114, x112, x51, x88); + fiat_secp384r1_addcarryx_u32(&x115, &x116, x114, x53, x90); + fiat_secp384r1_addcarryx_u32(&x117, &x118, x116, x55, x92); + fiat_secp384r1_addcarryx_u32(&x119, &x120, x118, x57, x94); + fiat_secp384r1_addcarryx_u32(&x121, &x122, x120, x59, x96); + fiat_secp384r1_mulx_u32(&x123, &x124, x1, (arg1[11])); + fiat_secp384r1_mulx_u32(&x125, &x126, x1, (arg1[10])); + fiat_secp384r1_mulx_u32(&x127, &x128, x1, (arg1[9])); + fiat_secp384r1_mulx_u32(&x129, &x130, x1, (arg1[8])); + fiat_secp384r1_mulx_u32(&x131, &x132, x1, (arg1[7])); + fiat_secp384r1_mulx_u32(&x133, &x134, x1, (arg1[6])); + fiat_secp384r1_mulx_u32(&x135, &x136, x1, (arg1[5])); + fiat_secp384r1_mulx_u32(&x137, &x138, x1, (arg1[4])); + fiat_secp384r1_mulx_u32(&x139, &x140, x1, (arg1[3])); + fiat_secp384r1_mulx_u32(&x141, &x142, x1, (arg1[2])); + fiat_secp384r1_mulx_u32(&x143, &x144, x1, (arg1[1])); + fiat_secp384r1_mulx_u32(&x145, &x146, x1, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x147, &x148, 0x0, x146, x143); + fiat_secp384r1_addcarryx_u32(&x149, &x150, x148, x144, x141); + fiat_secp384r1_addcarryx_u32(&x151, &x152, x150, x142, x139); + fiat_secp384r1_addcarryx_u32(&x153, &x154, x152, x140, x137); + fiat_secp384r1_addcarryx_u32(&x155, &x156, x154, x138, x135); + fiat_secp384r1_addcarryx_u32(&x157, &x158, x156, x136, x133); + fiat_secp384r1_addcarryx_u32(&x159, &x160, x158, x134, x131); + fiat_secp384r1_addcarryx_u32(&x161, &x162, x160, x132, x129); + fiat_secp384r1_addcarryx_u32(&x163, &x164, x162, x130, x127); + fiat_secp384r1_addcarryx_u32(&x165, &x166, x164, x128, x125); + fiat_secp384r1_addcarryx_u32(&x167, &x168, x166, x126, x123); + x169 = (x168 + x124); + fiat_secp384r1_addcarryx_u32(&x170, &x171, 0x0, x99, x145); + fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x101, x147); + fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x103, x149); + fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x105, x151); + fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x107, x153); + fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x109, x155); + fiat_secp384r1_addcarryx_u32(&x182, &x183, x181, x111, x157); + fiat_secp384r1_addcarryx_u32(&x184, &x185, x183, x113, x159); + fiat_secp384r1_addcarryx_u32(&x186, &x187, x185, x115, x161); + fiat_secp384r1_addcarryx_u32(&x188, &x189, x187, x117, x163); + fiat_secp384r1_addcarryx_u32(&x190, &x191, x189, x119, x165); + fiat_secp384r1_addcarryx_u32(&x192, &x193, x191, x121, x167); + fiat_secp384r1_addcarryx_u32(&x194, &x195, x193, x122, x169); + fiat_secp384r1_mulx_u32(&x196, &x197, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x198, &x199, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x200, &x201, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x202, &x203, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x204, &x205, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x206, &x207, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x208, &x209, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x210, &x211, x170, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x212, &x213, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x214, &x215, x170, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x216, &x217, 0x0, x213, x210); + fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x211, x208); + fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x209, x206); + fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x207, x204); + fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x205, x202); + fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x203, x200); + fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x201, x198); + fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x199, x196); + x232 = (x231 + x197); + fiat_secp384r1_addcarryx_u32(&x233, &x234, 0x0, x170, x214); + fiat_secp384r1_addcarryx_u32(&x235, &x236, x234, x172, x215); + fiat_secp384r1_addcarryx_u32(&x237, &x238, x236, x174, 0x0); + fiat_secp384r1_addcarryx_u32(&x239, &x240, x238, x176, x212); + fiat_secp384r1_addcarryx_u32(&x241, &x242, x240, x178, x216); + fiat_secp384r1_addcarryx_u32(&x243, &x244, x242, x180, x218); + fiat_secp384r1_addcarryx_u32(&x245, &x246, x244, x182, x220); + fiat_secp384r1_addcarryx_u32(&x247, &x248, x246, x184, x222); + fiat_secp384r1_addcarryx_u32(&x249, &x250, x248, x186, x224); + fiat_secp384r1_addcarryx_u32(&x251, &x252, x250, x188, x226); + fiat_secp384r1_addcarryx_u32(&x253, &x254, x252, x190, x228); + fiat_secp384r1_addcarryx_u32(&x255, &x256, x254, x192, x230); + fiat_secp384r1_addcarryx_u32(&x257, &x258, x256, x194, x232); + x259 = ((uint32_t)x258 + x195); + fiat_secp384r1_mulx_u32(&x260, &x261, x2, (arg1[11])); + fiat_secp384r1_mulx_u32(&x262, &x263, x2, (arg1[10])); + fiat_secp384r1_mulx_u32(&x264, &x265, x2, (arg1[9])); + fiat_secp384r1_mulx_u32(&x266, &x267, x2, (arg1[8])); + fiat_secp384r1_mulx_u32(&x268, &x269, x2, (arg1[7])); + fiat_secp384r1_mulx_u32(&x270, &x271, x2, (arg1[6])); + fiat_secp384r1_mulx_u32(&x272, &x273, x2, (arg1[5])); + fiat_secp384r1_mulx_u32(&x274, &x275, x2, (arg1[4])); + fiat_secp384r1_mulx_u32(&x276, &x277, x2, (arg1[3])); + fiat_secp384r1_mulx_u32(&x278, &x279, x2, (arg1[2])); + fiat_secp384r1_mulx_u32(&x280, &x281, x2, (arg1[1])); + fiat_secp384r1_mulx_u32(&x282, &x283, x2, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x284, &x285, 0x0, x283, x280); + fiat_secp384r1_addcarryx_u32(&x286, &x287, x285, x281, x278); + fiat_secp384r1_addcarryx_u32(&x288, &x289, x287, x279, x276); + fiat_secp384r1_addcarryx_u32(&x290, &x291, x289, x277, x274); + fiat_secp384r1_addcarryx_u32(&x292, &x293, x291, x275, x272); + fiat_secp384r1_addcarryx_u32(&x294, &x295, x293, x273, x270); + fiat_secp384r1_addcarryx_u32(&x296, &x297, x295, x271, x268); + fiat_secp384r1_addcarryx_u32(&x298, &x299, x297, x269, x266); + fiat_secp384r1_addcarryx_u32(&x300, &x301, x299, x267, x264); + fiat_secp384r1_addcarryx_u32(&x302, &x303, x301, x265, x262); + fiat_secp384r1_addcarryx_u32(&x304, &x305, x303, x263, x260); + x306 = (x305 + x261); + fiat_secp384r1_addcarryx_u32(&x307, &x308, 0x0, x235, x282); + fiat_secp384r1_addcarryx_u32(&x309, &x310, x308, x237, x284); + fiat_secp384r1_addcarryx_u32(&x311, &x312, x310, x239, x286); + fiat_secp384r1_addcarryx_u32(&x313, &x314, x312, x241, x288); + fiat_secp384r1_addcarryx_u32(&x315, &x316, x314, x243, x290); + fiat_secp384r1_addcarryx_u32(&x317, &x318, x316, x245, x292); + fiat_secp384r1_addcarryx_u32(&x319, &x320, x318, x247, x294); + fiat_secp384r1_addcarryx_u32(&x321, &x322, x320, x249, x296); + fiat_secp384r1_addcarryx_u32(&x323, &x324, x322, x251, x298); + fiat_secp384r1_addcarryx_u32(&x325, &x326, x324, x253, x300); + fiat_secp384r1_addcarryx_u32(&x327, &x328, x326, x255, x302); + fiat_secp384r1_addcarryx_u32(&x329, &x330, x328, x257, x304); + fiat_secp384r1_addcarryx_u32(&x331, &x332, x330, x259, x306); + fiat_secp384r1_mulx_u32(&x333, &x334, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x335, &x336, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x337, &x338, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x339, &x340, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x341, &x342, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x343, &x344, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x345, &x346, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x347, &x348, x307, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x349, &x350, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x351, &x352, x307, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x353, &x354, 0x0, x350, x347); + fiat_secp384r1_addcarryx_u32(&x355, &x356, x354, x348, x345); + fiat_secp384r1_addcarryx_u32(&x357, &x358, x356, x346, x343); + fiat_secp384r1_addcarryx_u32(&x359, &x360, x358, x344, x341); + fiat_secp384r1_addcarryx_u32(&x361, &x362, x360, x342, x339); + fiat_secp384r1_addcarryx_u32(&x363, &x364, x362, x340, x337); + fiat_secp384r1_addcarryx_u32(&x365, &x366, x364, x338, x335); + fiat_secp384r1_addcarryx_u32(&x367, &x368, x366, x336, x333); + x369 = (x368 + x334); + fiat_secp384r1_addcarryx_u32(&x370, &x371, 0x0, x307, x351); + fiat_secp384r1_addcarryx_u32(&x372, &x373, x371, x309, x352); + fiat_secp384r1_addcarryx_u32(&x374, &x375, x373, x311, 0x0); + fiat_secp384r1_addcarryx_u32(&x376, &x377, x375, x313, x349); + fiat_secp384r1_addcarryx_u32(&x378, &x379, x377, x315, x353); + fiat_secp384r1_addcarryx_u32(&x380, &x381, x379, x317, x355); + fiat_secp384r1_addcarryx_u32(&x382, &x383, x381, x319, x357); + fiat_secp384r1_addcarryx_u32(&x384, &x385, x383, x321, x359); + fiat_secp384r1_addcarryx_u32(&x386, &x387, x385, x323, x361); + fiat_secp384r1_addcarryx_u32(&x388, &x389, x387, x325, x363); + fiat_secp384r1_addcarryx_u32(&x390, &x391, x389, x327, x365); + fiat_secp384r1_addcarryx_u32(&x392, &x393, x391, x329, x367); + fiat_secp384r1_addcarryx_u32(&x394, &x395, x393, x331, x369); + x396 = ((uint32_t)x395 + x332); + fiat_secp384r1_mulx_u32(&x397, &x398, x3, (arg1[11])); + fiat_secp384r1_mulx_u32(&x399, &x400, x3, (arg1[10])); + fiat_secp384r1_mulx_u32(&x401, &x402, x3, (arg1[9])); + fiat_secp384r1_mulx_u32(&x403, &x404, x3, (arg1[8])); + fiat_secp384r1_mulx_u32(&x405, &x406, x3, (arg1[7])); + fiat_secp384r1_mulx_u32(&x407, &x408, x3, (arg1[6])); + fiat_secp384r1_mulx_u32(&x409, &x410, x3, (arg1[5])); + fiat_secp384r1_mulx_u32(&x411, &x412, x3, (arg1[4])); + fiat_secp384r1_mulx_u32(&x413, &x414, x3, (arg1[3])); + fiat_secp384r1_mulx_u32(&x415, &x416, x3, (arg1[2])); + fiat_secp384r1_mulx_u32(&x417, &x418, x3, (arg1[1])); + fiat_secp384r1_mulx_u32(&x419, &x420, x3, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x421, &x422, 0x0, x420, x417); + fiat_secp384r1_addcarryx_u32(&x423, &x424, x422, x418, x415); + fiat_secp384r1_addcarryx_u32(&x425, &x426, x424, x416, x413); + fiat_secp384r1_addcarryx_u32(&x427, &x428, x426, x414, x411); + fiat_secp384r1_addcarryx_u32(&x429, &x430, x428, x412, x409); + fiat_secp384r1_addcarryx_u32(&x431, &x432, x430, x410, x407); + fiat_secp384r1_addcarryx_u32(&x433, &x434, x432, x408, x405); + fiat_secp384r1_addcarryx_u32(&x435, &x436, x434, x406, x403); + fiat_secp384r1_addcarryx_u32(&x437, &x438, x436, x404, x401); + fiat_secp384r1_addcarryx_u32(&x439, &x440, x438, x402, x399); + fiat_secp384r1_addcarryx_u32(&x441, &x442, x440, x400, x397); + x443 = (x442 + x398); + fiat_secp384r1_addcarryx_u32(&x444, &x445, 0x0, x372, x419); + fiat_secp384r1_addcarryx_u32(&x446, &x447, x445, x374, x421); + fiat_secp384r1_addcarryx_u32(&x448, &x449, x447, x376, x423); + fiat_secp384r1_addcarryx_u32(&x450, &x451, x449, x378, x425); + fiat_secp384r1_addcarryx_u32(&x452, &x453, x451, x380, x427); + fiat_secp384r1_addcarryx_u32(&x454, &x455, x453, x382, x429); + fiat_secp384r1_addcarryx_u32(&x456, &x457, x455, x384, x431); + fiat_secp384r1_addcarryx_u32(&x458, &x459, x457, x386, x433); + fiat_secp384r1_addcarryx_u32(&x460, &x461, x459, x388, x435); + fiat_secp384r1_addcarryx_u32(&x462, &x463, x461, x390, x437); + fiat_secp384r1_addcarryx_u32(&x464, &x465, x463, x392, x439); + fiat_secp384r1_addcarryx_u32(&x466, &x467, x465, x394, x441); + fiat_secp384r1_addcarryx_u32(&x468, &x469, x467, x396, x443); + fiat_secp384r1_mulx_u32(&x470, &x471, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x472, &x473, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x474, &x475, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x476, &x477, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x478, &x479, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x480, &x481, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x482, &x483, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x484, &x485, x444, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x486, &x487, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x488, &x489, x444, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x490, &x491, 0x0, x487, x484); + fiat_secp384r1_addcarryx_u32(&x492, &x493, x491, x485, x482); + fiat_secp384r1_addcarryx_u32(&x494, &x495, x493, x483, x480); + fiat_secp384r1_addcarryx_u32(&x496, &x497, x495, x481, x478); + fiat_secp384r1_addcarryx_u32(&x498, &x499, x497, x479, x476); + fiat_secp384r1_addcarryx_u32(&x500, &x501, x499, x477, x474); + fiat_secp384r1_addcarryx_u32(&x502, &x503, x501, x475, x472); + fiat_secp384r1_addcarryx_u32(&x504, &x505, x503, x473, x470); + x506 = (x505 + x471); + fiat_secp384r1_addcarryx_u32(&x507, &x508, 0x0, x444, x488); + fiat_secp384r1_addcarryx_u32(&x509, &x510, x508, x446, x489); + fiat_secp384r1_addcarryx_u32(&x511, &x512, x510, x448, 0x0); + fiat_secp384r1_addcarryx_u32(&x513, &x514, x512, x450, x486); + fiat_secp384r1_addcarryx_u32(&x515, &x516, x514, x452, x490); + fiat_secp384r1_addcarryx_u32(&x517, &x518, x516, x454, x492); + fiat_secp384r1_addcarryx_u32(&x519, &x520, x518, x456, x494); + fiat_secp384r1_addcarryx_u32(&x521, &x522, x520, x458, x496); + fiat_secp384r1_addcarryx_u32(&x523, &x524, x522, x460, x498); + fiat_secp384r1_addcarryx_u32(&x525, &x526, x524, x462, x500); + fiat_secp384r1_addcarryx_u32(&x527, &x528, x526, x464, x502); + fiat_secp384r1_addcarryx_u32(&x529, &x530, x528, x466, x504); + fiat_secp384r1_addcarryx_u32(&x531, &x532, x530, x468, x506); + x533 = ((uint32_t)x532 + x469); + fiat_secp384r1_mulx_u32(&x534, &x535, x4, (arg1[11])); + fiat_secp384r1_mulx_u32(&x536, &x537, x4, (arg1[10])); + fiat_secp384r1_mulx_u32(&x538, &x539, x4, (arg1[9])); + fiat_secp384r1_mulx_u32(&x540, &x541, x4, (arg1[8])); + fiat_secp384r1_mulx_u32(&x542, &x543, x4, (arg1[7])); + fiat_secp384r1_mulx_u32(&x544, &x545, x4, (arg1[6])); + fiat_secp384r1_mulx_u32(&x546, &x547, x4, (arg1[5])); + fiat_secp384r1_mulx_u32(&x548, &x549, x4, (arg1[4])); + fiat_secp384r1_mulx_u32(&x550, &x551, x4, (arg1[3])); + fiat_secp384r1_mulx_u32(&x552, &x553, x4, (arg1[2])); + fiat_secp384r1_mulx_u32(&x554, &x555, x4, (arg1[1])); + fiat_secp384r1_mulx_u32(&x556, &x557, x4, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x558, &x559, 0x0, x557, x554); + fiat_secp384r1_addcarryx_u32(&x560, &x561, x559, x555, x552); + fiat_secp384r1_addcarryx_u32(&x562, &x563, x561, x553, x550); + fiat_secp384r1_addcarryx_u32(&x564, &x565, x563, x551, x548); + fiat_secp384r1_addcarryx_u32(&x566, &x567, x565, x549, x546); + fiat_secp384r1_addcarryx_u32(&x568, &x569, x567, x547, x544); + fiat_secp384r1_addcarryx_u32(&x570, &x571, x569, x545, x542); + fiat_secp384r1_addcarryx_u32(&x572, &x573, x571, x543, x540); + fiat_secp384r1_addcarryx_u32(&x574, &x575, x573, x541, x538); + fiat_secp384r1_addcarryx_u32(&x576, &x577, x575, x539, x536); + fiat_secp384r1_addcarryx_u32(&x578, &x579, x577, x537, x534); + x580 = (x579 + x535); + fiat_secp384r1_addcarryx_u32(&x581, &x582, 0x0, x509, x556); + fiat_secp384r1_addcarryx_u32(&x583, &x584, x582, x511, x558); + fiat_secp384r1_addcarryx_u32(&x585, &x586, x584, x513, x560); + fiat_secp384r1_addcarryx_u32(&x587, &x588, x586, x515, x562); + fiat_secp384r1_addcarryx_u32(&x589, &x590, x588, x517, x564); + fiat_secp384r1_addcarryx_u32(&x591, &x592, x590, x519, x566); + fiat_secp384r1_addcarryx_u32(&x593, &x594, x592, x521, x568); + fiat_secp384r1_addcarryx_u32(&x595, &x596, x594, x523, x570); + fiat_secp384r1_addcarryx_u32(&x597, &x598, x596, x525, x572); + fiat_secp384r1_addcarryx_u32(&x599, &x600, x598, x527, x574); + fiat_secp384r1_addcarryx_u32(&x601, &x602, x600, x529, x576); + fiat_secp384r1_addcarryx_u32(&x603, &x604, x602, x531, x578); + fiat_secp384r1_addcarryx_u32(&x605, &x606, x604, x533, x580); + fiat_secp384r1_mulx_u32(&x607, &x608, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x609, &x610, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x611, &x612, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x613, &x614, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x615, &x616, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x617, &x618, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x619, &x620, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x621, &x622, x581, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x623, &x624, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x625, &x626, x581, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x627, &x628, 0x0, x624, x621); + fiat_secp384r1_addcarryx_u32(&x629, &x630, x628, x622, x619); + fiat_secp384r1_addcarryx_u32(&x631, &x632, x630, x620, x617); + fiat_secp384r1_addcarryx_u32(&x633, &x634, x632, x618, x615); + fiat_secp384r1_addcarryx_u32(&x635, &x636, x634, x616, x613); + fiat_secp384r1_addcarryx_u32(&x637, &x638, x636, x614, x611); + fiat_secp384r1_addcarryx_u32(&x639, &x640, x638, x612, x609); + fiat_secp384r1_addcarryx_u32(&x641, &x642, x640, x610, x607); + x643 = (x642 + x608); + fiat_secp384r1_addcarryx_u32(&x644, &x645, 0x0, x581, x625); + fiat_secp384r1_addcarryx_u32(&x646, &x647, x645, x583, x626); + fiat_secp384r1_addcarryx_u32(&x648, &x649, x647, x585, 0x0); + fiat_secp384r1_addcarryx_u32(&x650, &x651, x649, x587, x623); + fiat_secp384r1_addcarryx_u32(&x652, &x653, x651, x589, x627); + fiat_secp384r1_addcarryx_u32(&x654, &x655, x653, x591, x629); + fiat_secp384r1_addcarryx_u32(&x656, &x657, x655, x593, x631); + fiat_secp384r1_addcarryx_u32(&x658, &x659, x657, x595, x633); + fiat_secp384r1_addcarryx_u32(&x660, &x661, x659, x597, x635); + fiat_secp384r1_addcarryx_u32(&x662, &x663, x661, x599, x637); + fiat_secp384r1_addcarryx_u32(&x664, &x665, x663, x601, x639); + fiat_secp384r1_addcarryx_u32(&x666, &x667, x665, x603, x641); + fiat_secp384r1_addcarryx_u32(&x668, &x669, x667, x605, x643); + x670 = ((uint32_t)x669 + x606); + fiat_secp384r1_mulx_u32(&x671, &x672, x5, (arg1[11])); + fiat_secp384r1_mulx_u32(&x673, &x674, x5, (arg1[10])); + fiat_secp384r1_mulx_u32(&x675, &x676, x5, (arg1[9])); + fiat_secp384r1_mulx_u32(&x677, &x678, x5, (arg1[8])); + fiat_secp384r1_mulx_u32(&x679, &x680, x5, (arg1[7])); + fiat_secp384r1_mulx_u32(&x681, &x682, x5, (arg1[6])); + fiat_secp384r1_mulx_u32(&x683, &x684, x5, (arg1[5])); + fiat_secp384r1_mulx_u32(&x685, &x686, x5, (arg1[4])); + fiat_secp384r1_mulx_u32(&x687, &x688, x5, (arg1[3])); + fiat_secp384r1_mulx_u32(&x689, &x690, x5, (arg1[2])); + fiat_secp384r1_mulx_u32(&x691, &x692, x5, (arg1[1])); + fiat_secp384r1_mulx_u32(&x693, &x694, x5, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x695, &x696, 0x0, x694, x691); + fiat_secp384r1_addcarryx_u32(&x697, &x698, x696, x692, x689); + fiat_secp384r1_addcarryx_u32(&x699, &x700, x698, x690, x687); + fiat_secp384r1_addcarryx_u32(&x701, &x702, x700, x688, x685); + fiat_secp384r1_addcarryx_u32(&x703, &x704, x702, x686, x683); + fiat_secp384r1_addcarryx_u32(&x705, &x706, x704, x684, x681); + fiat_secp384r1_addcarryx_u32(&x707, &x708, x706, x682, x679); + fiat_secp384r1_addcarryx_u32(&x709, &x710, x708, x680, x677); + fiat_secp384r1_addcarryx_u32(&x711, &x712, x710, x678, x675); + fiat_secp384r1_addcarryx_u32(&x713, &x714, x712, x676, x673); + fiat_secp384r1_addcarryx_u32(&x715, &x716, x714, x674, x671); + x717 = (x716 + x672); + fiat_secp384r1_addcarryx_u32(&x718, &x719, 0x0, x646, x693); + fiat_secp384r1_addcarryx_u32(&x720, &x721, x719, x648, x695); + fiat_secp384r1_addcarryx_u32(&x722, &x723, x721, x650, x697); + fiat_secp384r1_addcarryx_u32(&x724, &x725, x723, x652, x699); + fiat_secp384r1_addcarryx_u32(&x726, &x727, x725, x654, x701); + fiat_secp384r1_addcarryx_u32(&x728, &x729, x727, x656, x703); + fiat_secp384r1_addcarryx_u32(&x730, &x731, x729, x658, x705); + fiat_secp384r1_addcarryx_u32(&x732, &x733, x731, x660, x707); + fiat_secp384r1_addcarryx_u32(&x734, &x735, x733, x662, x709); + fiat_secp384r1_addcarryx_u32(&x736, &x737, x735, x664, x711); + fiat_secp384r1_addcarryx_u32(&x738, &x739, x737, x666, x713); + fiat_secp384r1_addcarryx_u32(&x740, &x741, x739, x668, x715); + fiat_secp384r1_addcarryx_u32(&x742, &x743, x741, x670, x717); + fiat_secp384r1_mulx_u32(&x744, &x745, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x746, &x747, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x748, &x749, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x750, &x751, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x752, &x753, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x754, &x755, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x756, &x757, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x758, &x759, x718, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x760, &x761, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x762, &x763, x718, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x764, &x765, 0x0, x761, x758); + fiat_secp384r1_addcarryx_u32(&x766, &x767, x765, x759, x756); + fiat_secp384r1_addcarryx_u32(&x768, &x769, x767, x757, x754); + fiat_secp384r1_addcarryx_u32(&x770, &x771, x769, x755, x752); + fiat_secp384r1_addcarryx_u32(&x772, &x773, x771, x753, x750); + fiat_secp384r1_addcarryx_u32(&x774, &x775, x773, x751, x748); + fiat_secp384r1_addcarryx_u32(&x776, &x777, x775, x749, x746); + fiat_secp384r1_addcarryx_u32(&x778, &x779, x777, x747, x744); + x780 = (x779 + x745); + fiat_secp384r1_addcarryx_u32(&x781, &x782, 0x0, x718, x762); + fiat_secp384r1_addcarryx_u32(&x783, &x784, x782, x720, x763); + fiat_secp384r1_addcarryx_u32(&x785, &x786, x784, x722, 0x0); + fiat_secp384r1_addcarryx_u32(&x787, &x788, x786, x724, x760); + fiat_secp384r1_addcarryx_u32(&x789, &x790, x788, x726, x764); + fiat_secp384r1_addcarryx_u32(&x791, &x792, x790, x728, x766); + fiat_secp384r1_addcarryx_u32(&x793, &x794, x792, x730, x768); + fiat_secp384r1_addcarryx_u32(&x795, &x796, x794, x732, x770); + fiat_secp384r1_addcarryx_u32(&x797, &x798, x796, x734, x772); + fiat_secp384r1_addcarryx_u32(&x799, &x800, x798, x736, x774); + fiat_secp384r1_addcarryx_u32(&x801, &x802, x800, x738, x776); + fiat_secp384r1_addcarryx_u32(&x803, &x804, x802, x740, x778); + fiat_secp384r1_addcarryx_u32(&x805, &x806, x804, x742, x780); + x807 = ((uint32_t)x806 + x743); + fiat_secp384r1_mulx_u32(&x808, &x809, x6, (arg1[11])); + fiat_secp384r1_mulx_u32(&x810, &x811, x6, (arg1[10])); + fiat_secp384r1_mulx_u32(&x812, &x813, x6, (arg1[9])); + fiat_secp384r1_mulx_u32(&x814, &x815, x6, (arg1[8])); + fiat_secp384r1_mulx_u32(&x816, &x817, x6, (arg1[7])); + fiat_secp384r1_mulx_u32(&x818, &x819, x6, (arg1[6])); + fiat_secp384r1_mulx_u32(&x820, &x821, x6, (arg1[5])); + fiat_secp384r1_mulx_u32(&x822, &x823, x6, (arg1[4])); + fiat_secp384r1_mulx_u32(&x824, &x825, x6, (arg1[3])); + fiat_secp384r1_mulx_u32(&x826, &x827, x6, (arg1[2])); + fiat_secp384r1_mulx_u32(&x828, &x829, x6, (arg1[1])); + fiat_secp384r1_mulx_u32(&x830, &x831, x6, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x832, &x833, 0x0, x831, x828); + fiat_secp384r1_addcarryx_u32(&x834, &x835, x833, x829, x826); + fiat_secp384r1_addcarryx_u32(&x836, &x837, x835, x827, x824); + fiat_secp384r1_addcarryx_u32(&x838, &x839, x837, x825, x822); + fiat_secp384r1_addcarryx_u32(&x840, &x841, x839, x823, x820); + fiat_secp384r1_addcarryx_u32(&x842, &x843, x841, x821, x818); + fiat_secp384r1_addcarryx_u32(&x844, &x845, x843, x819, x816); + fiat_secp384r1_addcarryx_u32(&x846, &x847, x845, x817, x814); + fiat_secp384r1_addcarryx_u32(&x848, &x849, x847, x815, x812); + fiat_secp384r1_addcarryx_u32(&x850, &x851, x849, x813, x810); + fiat_secp384r1_addcarryx_u32(&x852, &x853, x851, x811, x808); + x854 = (x853 + x809); + fiat_secp384r1_addcarryx_u32(&x855, &x856, 0x0, x783, x830); + fiat_secp384r1_addcarryx_u32(&x857, &x858, x856, x785, x832); + fiat_secp384r1_addcarryx_u32(&x859, &x860, x858, x787, x834); + fiat_secp384r1_addcarryx_u32(&x861, &x862, x860, x789, x836); + fiat_secp384r1_addcarryx_u32(&x863, &x864, x862, x791, x838); + fiat_secp384r1_addcarryx_u32(&x865, &x866, x864, x793, x840); + fiat_secp384r1_addcarryx_u32(&x867, &x868, x866, x795, x842); + fiat_secp384r1_addcarryx_u32(&x869, &x870, x868, x797, x844); + fiat_secp384r1_addcarryx_u32(&x871, &x872, x870, x799, x846); + fiat_secp384r1_addcarryx_u32(&x873, &x874, x872, x801, x848); + fiat_secp384r1_addcarryx_u32(&x875, &x876, x874, x803, x850); + fiat_secp384r1_addcarryx_u32(&x877, &x878, x876, x805, x852); + fiat_secp384r1_addcarryx_u32(&x879, &x880, x878, x807, x854); + fiat_secp384r1_mulx_u32(&x881, &x882, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x883, &x884, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x885, &x886, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x887, &x888, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x889, &x890, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x891, &x892, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x893, &x894, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x895, &x896, x855, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x897, &x898, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x899, &x900, x855, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x901, &x902, 0x0, x898, x895); + fiat_secp384r1_addcarryx_u32(&x903, &x904, x902, x896, x893); + fiat_secp384r1_addcarryx_u32(&x905, &x906, x904, x894, x891); + fiat_secp384r1_addcarryx_u32(&x907, &x908, x906, x892, x889); + fiat_secp384r1_addcarryx_u32(&x909, &x910, x908, x890, x887); + fiat_secp384r1_addcarryx_u32(&x911, &x912, x910, x888, x885); + fiat_secp384r1_addcarryx_u32(&x913, &x914, x912, x886, x883); + fiat_secp384r1_addcarryx_u32(&x915, &x916, x914, x884, x881); + x917 = (x916 + x882); + fiat_secp384r1_addcarryx_u32(&x918, &x919, 0x0, x855, x899); + fiat_secp384r1_addcarryx_u32(&x920, &x921, x919, x857, x900); + fiat_secp384r1_addcarryx_u32(&x922, &x923, x921, x859, 0x0); + fiat_secp384r1_addcarryx_u32(&x924, &x925, x923, x861, x897); + fiat_secp384r1_addcarryx_u32(&x926, &x927, x925, x863, x901); + fiat_secp384r1_addcarryx_u32(&x928, &x929, x927, x865, x903); + fiat_secp384r1_addcarryx_u32(&x930, &x931, x929, x867, x905); + fiat_secp384r1_addcarryx_u32(&x932, &x933, x931, x869, x907); + fiat_secp384r1_addcarryx_u32(&x934, &x935, x933, x871, x909); + fiat_secp384r1_addcarryx_u32(&x936, &x937, x935, x873, x911); + fiat_secp384r1_addcarryx_u32(&x938, &x939, x937, x875, x913); + fiat_secp384r1_addcarryx_u32(&x940, &x941, x939, x877, x915); + fiat_secp384r1_addcarryx_u32(&x942, &x943, x941, x879, x917); + x944 = ((uint32_t)x943 + x880); + fiat_secp384r1_mulx_u32(&x945, &x946, x7, (arg1[11])); + fiat_secp384r1_mulx_u32(&x947, &x948, x7, (arg1[10])); + fiat_secp384r1_mulx_u32(&x949, &x950, x7, (arg1[9])); + fiat_secp384r1_mulx_u32(&x951, &x952, x7, (arg1[8])); + fiat_secp384r1_mulx_u32(&x953, &x954, x7, (arg1[7])); + fiat_secp384r1_mulx_u32(&x955, &x956, x7, (arg1[6])); + fiat_secp384r1_mulx_u32(&x957, &x958, x7, (arg1[5])); + fiat_secp384r1_mulx_u32(&x959, &x960, x7, (arg1[4])); + fiat_secp384r1_mulx_u32(&x961, &x962, x7, (arg1[3])); + fiat_secp384r1_mulx_u32(&x963, &x964, x7, (arg1[2])); + fiat_secp384r1_mulx_u32(&x965, &x966, x7, (arg1[1])); + fiat_secp384r1_mulx_u32(&x967, &x968, x7, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x969, &x970, 0x0, x968, x965); + fiat_secp384r1_addcarryx_u32(&x971, &x972, x970, x966, x963); + fiat_secp384r1_addcarryx_u32(&x973, &x974, x972, x964, x961); + fiat_secp384r1_addcarryx_u32(&x975, &x976, x974, x962, x959); + fiat_secp384r1_addcarryx_u32(&x977, &x978, x976, x960, x957); + fiat_secp384r1_addcarryx_u32(&x979, &x980, x978, x958, x955); + fiat_secp384r1_addcarryx_u32(&x981, &x982, x980, x956, x953); + fiat_secp384r1_addcarryx_u32(&x983, &x984, x982, x954, x951); + fiat_secp384r1_addcarryx_u32(&x985, &x986, x984, x952, x949); + fiat_secp384r1_addcarryx_u32(&x987, &x988, x986, x950, x947); + fiat_secp384r1_addcarryx_u32(&x989, &x990, x988, x948, x945); + x991 = (x990 + x946); + fiat_secp384r1_addcarryx_u32(&x992, &x993, 0x0, x920, x967); + fiat_secp384r1_addcarryx_u32(&x994, &x995, x993, x922, x969); + fiat_secp384r1_addcarryx_u32(&x996, &x997, x995, x924, x971); + fiat_secp384r1_addcarryx_u32(&x998, &x999, x997, x926, x973); + fiat_secp384r1_addcarryx_u32(&x1000, &x1001, x999, x928, x975); + fiat_secp384r1_addcarryx_u32(&x1002, &x1003, x1001, x930, x977); + fiat_secp384r1_addcarryx_u32(&x1004, &x1005, x1003, x932, x979); + fiat_secp384r1_addcarryx_u32(&x1006, &x1007, x1005, x934, x981); + fiat_secp384r1_addcarryx_u32(&x1008, &x1009, x1007, x936, x983); + fiat_secp384r1_addcarryx_u32(&x1010, &x1011, x1009, x938, x985); + fiat_secp384r1_addcarryx_u32(&x1012, &x1013, x1011, x940, x987); + fiat_secp384r1_addcarryx_u32(&x1014, &x1015, x1013, x942, x989); + fiat_secp384r1_addcarryx_u32(&x1016, &x1017, x1015, x944, x991); + fiat_secp384r1_mulx_u32(&x1018, &x1019, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1020, &x1021, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1022, &x1023, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1024, &x1025, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1026, &x1027, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1028, &x1029, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1030, &x1031, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1032, &x1033, x992, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1034, &x1035, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1036, &x1037, x992, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1038, &x1039, 0x0, x1035, x1032); + fiat_secp384r1_addcarryx_u32(&x1040, &x1041, x1039, x1033, x1030); + fiat_secp384r1_addcarryx_u32(&x1042, &x1043, x1041, x1031, x1028); + fiat_secp384r1_addcarryx_u32(&x1044, &x1045, x1043, x1029, x1026); + fiat_secp384r1_addcarryx_u32(&x1046, &x1047, x1045, x1027, x1024); + fiat_secp384r1_addcarryx_u32(&x1048, &x1049, x1047, x1025, x1022); + fiat_secp384r1_addcarryx_u32(&x1050, &x1051, x1049, x1023, x1020); + fiat_secp384r1_addcarryx_u32(&x1052, &x1053, x1051, x1021, x1018); + x1054 = (x1053 + x1019); + fiat_secp384r1_addcarryx_u32(&x1055, &x1056, 0x0, x992, x1036); + fiat_secp384r1_addcarryx_u32(&x1057, &x1058, x1056, x994, x1037); + fiat_secp384r1_addcarryx_u32(&x1059, &x1060, x1058, x996, 0x0); + fiat_secp384r1_addcarryx_u32(&x1061, &x1062, x1060, x998, x1034); + fiat_secp384r1_addcarryx_u32(&x1063, &x1064, x1062, x1000, x1038); + fiat_secp384r1_addcarryx_u32(&x1065, &x1066, x1064, x1002, x1040); + fiat_secp384r1_addcarryx_u32(&x1067, &x1068, x1066, x1004, x1042); + fiat_secp384r1_addcarryx_u32(&x1069, &x1070, x1068, x1006, x1044); + fiat_secp384r1_addcarryx_u32(&x1071, &x1072, x1070, x1008, x1046); + fiat_secp384r1_addcarryx_u32(&x1073, &x1074, x1072, x1010, x1048); + fiat_secp384r1_addcarryx_u32(&x1075, &x1076, x1074, x1012, x1050); + fiat_secp384r1_addcarryx_u32(&x1077, &x1078, x1076, x1014, x1052); + fiat_secp384r1_addcarryx_u32(&x1079, &x1080, x1078, x1016, x1054); + x1081 = ((uint32_t)x1080 + x1017); + fiat_secp384r1_mulx_u32(&x1082, &x1083, x8, (arg1[11])); + fiat_secp384r1_mulx_u32(&x1084, &x1085, x8, (arg1[10])); + fiat_secp384r1_mulx_u32(&x1086, &x1087, x8, (arg1[9])); + fiat_secp384r1_mulx_u32(&x1088, &x1089, x8, (arg1[8])); + fiat_secp384r1_mulx_u32(&x1090, &x1091, x8, (arg1[7])); + fiat_secp384r1_mulx_u32(&x1092, &x1093, x8, (arg1[6])); + fiat_secp384r1_mulx_u32(&x1094, &x1095, x8, (arg1[5])); + fiat_secp384r1_mulx_u32(&x1096, &x1097, x8, (arg1[4])); + fiat_secp384r1_mulx_u32(&x1098, &x1099, x8, (arg1[3])); + fiat_secp384r1_mulx_u32(&x1100, &x1101, x8, (arg1[2])); + fiat_secp384r1_mulx_u32(&x1102, &x1103, x8, (arg1[1])); + fiat_secp384r1_mulx_u32(&x1104, &x1105, x8, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x1106, &x1107, 0x0, x1105, x1102); + fiat_secp384r1_addcarryx_u32(&x1108, &x1109, x1107, x1103, x1100); + fiat_secp384r1_addcarryx_u32(&x1110, &x1111, x1109, x1101, x1098); + fiat_secp384r1_addcarryx_u32(&x1112, &x1113, x1111, x1099, x1096); + fiat_secp384r1_addcarryx_u32(&x1114, &x1115, x1113, x1097, x1094); + fiat_secp384r1_addcarryx_u32(&x1116, &x1117, x1115, x1095, x1092); + fiat_secp384r1_addcarryx_u32(&x1118, &x1119, x1117, x1093, x1090); + fiat_secp384r1_addcarryx_u32(&x1120, &x1121, x1119, x1091, x1088); + fiat_secp384r1_addcarryx_u32(&x1122, &x1123, x1121, x1089, x1086); + fiat_secp384r1_addcarryx_u32(&x1124, &x1125, x1123, x1087, x1084); + fiat_secp384r1_addcarryx_u32(&x1126, &x1127, x1125, x1085, x1082); + x1128 = (x1127 + x1083); + fiat_secp384r1_addcarryx_u32(&x1129, &x1130, 0x0, x1057, x1104); + fiat_secp384r1_addcarryx_u32(&x1131, &x1132, x1130, x1059, x1106); + fiat_secp384r1_addcarryx_u32(&x1133, &x1134, x1132, x1061, x1108); + fiat_secp384r1_addcarryx_u32(&x1135, &x1136, x1134, x1063, x1110); + fiat_secp384r1_addcarryx_u32(&x1137, &x1138, x1136, x1065, x1112); + fiat_secp384r1_addcarryx_u32(&x1139, &x1140, x1138, x1067, x1114); + fiat_secp384r1_addcarryx_u32(&x1141, &x1142, x1140, x1069, x1116); + fiat_secp384r1_addcarryx_u32(&x1143, &x1144, x1142, x1071, x1118); + fiat_secp384r1_addcarryx_u32(&x1145, &x1146, x1144, x1073, x1120); + fiat_secp384r1_addcarryx_u32(&x1147, &x1148, x1146, x1075, x1122); + fiat_secp384r1_addcarryx_u32(&x1149, &x1150, x1148, x1077, x1124); + fiat_secp384r1_addcarryx_u32(&x1151, &x1152, x1150, x1079, x1126); + fiat_secp384r1_addcarryx_u32(&x1153, &x1154, x1152, x1081, x1128); + fiat_secp384r1_mulx_u32(&x1155, &x1156, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1157, &x1158, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1159, &x1160, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1161, &x1162, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1163, &x1164, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1165, &x1166, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1167, &x1168, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1169, &x1170, x1129, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1171, &x1172, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1173, &x1174, x1129, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1175, &x1176, 0x0, x1172, x1169); + fiat_secp384r1_addcarryx_u32(&x1177, &x1178, x1176, x1170, x1167); + fiat_secp384r1_addcarryx_u32(&x1179, &x1180, x1178, x1168, x1165); + fiat_secp384r1_addcarryx_u32(&x1181, &x1182, x1180, x1166, x1163); + fiat_secp384r1_addcarryx_u32(&x1183, &x1184, x1182, x1164, x1161); + fiat_secp384r1_addcarryx_u32(&x1185, &x1186, x1184, x1162, x1159); + fiat_secp384r1_addcarryx_u32(&x1187, &x1188, x1186, x1160, x1157); + fiat_secp384r1_addcarryx_u32(&x1189, &x1190, x1188, x1158, x1155); + x1191 = (x1190 + x1156); + fiat_secp384r1_addcarryx_u32(&x1192, &x1193, 0x0, x1129, x1173); + fiat_secp384r1_addcarryx_u32(&x1194, &x1195, x1193, x1131, x1174); + fiat_secp384r1_addcarryx_u32(&x1196, &x1197, x1195, x1133, 0x0); + fiat_secp384r1_addcarryx_u32(&x1198, &x1199, x1197, x1135, x1171); + fiat_secp384r1_addcarryx_u32(&x1200, &x1201, x1199, x1137, x1175); + fiat_secp384r1_addcarryx_u32(&x1202, &x1203, x1201, x1139, x1177); + fiat_secp384r1_addcarryx_u32(&x1204, &x1205, x1203, x1141, x1179); + fiat_secp384r1_addcarryx_u32(&x1206, &x1207, x1205, x1143, x1181); + fiat_secp384r1_addcarryx_u32(&x1208, &x1209, x1207, x1145, x1183); + fiat_secp384r1_addcarryx_u32(&x1210, &x1211, x1209, x1147, x1185); + fiat_secp384r1_addcarryx_u32(&x1212, &x1213, x1211, x1149, x1187); + fiat_secp384r1_addcarryx_u32(&x1214, &x1215, x1213, x1151, x1189); + fiat_secp384r1_addcarryx_u32(&x1216, &x1217, x1215, x1153, x1191); + x1218 = ((uint32_t)x1217 + x1154); + fiat_secp384r1_mulx_u32(&x1219, &x1220, x9, (arg1[11])); + fiat_secp384r1_mulx_u32(&x1221, &x1222, x9, (arg1[10])); + fiat_secp384r1_mulx_u32(&x1223, &x1224, x9, (arg1[9])); + fiat_secp384r1_mulx_u32(&x1225, &x1226, x9, (arg1[8])); + fiat_secp384r1_mulx_u32(&x1227, &x1228, x9, (arg1[7])); + fiat_secp384r1_mulx_u32(&x1229, &x1230, x9, (arg1[6])); + fiat_secp384r1_mulx_u32(&x1231, &x1232, x9, (arg1[5])); + fiat_secp384r1_mulx_u32(&x1233, &x1234, x9, (arg1[4])); + fiat_secp384r1_mulx_u32(&x1235, &x1236, x9, (arg1[3])); + fiat_secp384r1_mulx_u32(&x1237, &x1238, x9, (arg1[2])); + fiat_secp384r1_mulx_u32(&x1239, &x1240, x9, (arg1[1])); + fiat_secp384r1_mulx_u32(&x1241, &x1242, x9, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x1243, &x1244, 0x0, x1242, x1239); + fiat_secp384r1_addcarryx_u32(&x1245, &x1246, x1244, x1240, x1237); + fiat_secp384r1_addcarryx_u32(&x1247, &x1248, x1246, x1238, x1235); + fiat_secp384r1_addcarryx_u32(&x1249, &x1250, x1248, x1236, x1233); + fiat_secp384r1_addcarryx_u32(&x1251, &x1252, x1250, x1234, x1231); + fiat_secp384r1_addcarryx_u32(&x1253, &x1254, x1252, x1232, x1229); + fiat_secp384r1_addcarryx_u32(&x1255, &x1256, x1254, x1230, x1227); + fiat_secp384r1_addcarryx_u32(&x1257, &x1258, x1256, x1228, x1225); + fiat_secp384r1_addcarryx_u32(&x1259, &x1260, x1258, x1226, x1223); + fiat_secp384r1_addcarryx_u32(&x1261, &x1262, x1260, x1224, x1221); + fiat_secp384r1_addcarryx_u32(&x1263, &x1264, x1262, x1222, x1219); + x1265 = (x1264 + x1220); + fiat_secp384r1_addcarryx_u32(&x1266, &x1267, 0x0, x1194, x1241); + fiat_secp384r1_addcarryx_u32(&x1268, &x1269, x1267, x1196, x1243); + fiat_secp384r1_addcarryx_u32(&x1270, &x1271, x1269, x1198, x1245); + fiat_secp384r1_addcarryx_u32(&x1272, &x1273, x1271, x1200, x1247); + fiat_secp384r1_addcarryx_u32(&x1274, &x1275, x1273, x1202, x1249); + fiat_secp384r1_addcarryx_u32(&x1276, &x1277, x1275, x1204, x1251); + fiat_secp384r1_addcarryx_u32(&x1278, &x1279, x1277, x1206, x1253); + fiat_secp384r1_addcarryx_u32(&x1280, &x1281, x1279, x1208, x1255); + fiat_secp384r1_addcarryx_u32(&x1282, &x1283, x1281, x1210, x1257); + fiat_secp384r1_addcarryx_u32(&x1284, &x1285, x1283, x1212, x1259); + fiat_secp384r1_addcarryx_u32(&x1286, &x1287, x1285, x1214, x1261); + fiat_secp384r1_addcarryx_u32(&x1288, &x1289, x1287, x1216, x1263); + fiat_secp384r1_addcarryx_u32(&x1290, &x1291, x1289, x1218, x1265); + fiat_secp384r1_mulx_u32(&x1292, &x1293, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1294, &x1295, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1296, &x1297, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1298, &x1299, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1300, &x1301, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1302, &x1303, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1304, &x1305, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1306, &x1307, x1266, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1308, &x1309, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1310, &x1311, x1266, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1312, &x1313, 0x0, x1309, x1306); + fiat_secp384r1_addcarryx_u32(&x1314, &x1315, x1313, x1307, x1304); + fiat_secp384r1_addcarryx_u32(&x1316, &x1317, x1315, x1305, x1302); + fiat_secp384r1_addcarryx_u32(&x1318, &x1319, x1317, x1303, x1300); + fiat_secp384r1_addcarryx_u32(&x1320, &x1321, x1319, x1301, x1298); + fiat_secp384r1_addcarryx_u32(&x1322, &x1323, x1321, x1299, x1296); + fiat_secp384r1_addcarryx_u32(&x1324, &x1325, x1323, x1297, x1294); + fiat_secp384r1_addcarryx_u32(&x1326, &x1327, x1325, x1295, x1292); + x1328 = (x1327 + x1293); + fiat_secp384r1_addcarryx_u32(&x1329, &x1330, 0x0, x1266, x1310); + fiat_secp384r1_addcarryx_u32(&x1331, &x1332, x1330, x1268, x1311); + fiat_secp384r1_addcarryx_u32(&x1333, &x1334, x1332, x1270, 0x0); + fiat_secp384r1_addcarryx_u32(&x1335, &x1336, x1334, x1272, x1308); + fiat_secp384r1_addcarryx_u32(&x1337, &x1338, x1336, x1274, x1312); + fiat_secp384r1_addcarryx_u32(&x1339, &x1340, x1338, x1276, x1314); + fiat_secp384r1_addcarryx_u32(&x1341, &x1342, x1340, x1278, x1316); + fiat_secp384r1_addcarryx_u32(&x1343, &x1344, x1342, x1280, x1318); + fiat_secp384r1_addcarryx_u32(&x1345, &x1346, x1344, x1282, x1320); + fiat_secp384r1_addcarryx_u32(&x1347, &x1348, x1346, x1284, x1322); + fiat_secp384r1_addcarryx_u32(&x1349, &x1350, x1348, x1286, x1324); + fiat_secp384r1_addcarryx_u32(&x1351, &x1352, x1350, x1288, x1326); + fiat_secp384r1_addcarryx_u32(&x1353, &x1354, x1352, x1290, x1328); + x1355 = ((uint32_t)x1354 + x1291); + fiat_secp384r1_mulx_u32(&x1356, &x1357, x10, (arg1[11])); + fiat_secp384r1_mulx_u32(&x1358, &x1359, x10, (arg1[10])); + fiat_secp384r1_mulx_u32(&x1360, &x1361, x10, (arg1[9])); + fiat_secp384r1_mulx_u32(&x1362, &x1363, x10, (arg1[8])); + fiat_secp384r1_mulx_u32(&x1364, &x1365, x10, (arg1[7])); + fiat_secp384r1_mulx_u32(&x1366, &x1367, x10, (arg1[6])); + fiat_secp384r1_mulx_u32(&x1368, &x1369, x10, (arg1[5])); + fiat_secp384r1_mulx_u32(&x1370, &x1371, x10, (arg1[4])); + fiat_secp384r1_mulx_u32(&x1372, &x1373, x10, (arg1[3])); + fiat_secp384r1_mulx_u32(&x1374, &x1375, x10, (arg1[2])); + fiat_secp384r1_mulx_u32(&x1376, &x1377, x10, (arg1[1])); + fiat_secp384r1_mulx_u32(&x1378, &x1379, x10, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x1380, &x1381, 0x0, x1379, x1376); + fiat_secp384r1_addcarryx_u32(&x1382, &x1383, x1381, x1377, x1374); + fiat_secp384r1_addcarryx_u32(&x1384, &x1385, x1383, x1375, x1372); + fiat_secp384r1_addcarryx_u32(&x1386, &x1387, x1385, x1373, x1370); + fiat_secp384r1_addcarryx_u32(&x1388, &x1389, x1387, x1371, x1368); + fiat_secp384r1_addcarryx_u32(&x1390, &x1391, x1389, x1369, x1366); + fiat_secp384r1_addcarryx_u32(&x1392, &x1393, x1391, x1367, x1364); + fiat_secp384r1_addcarryx_u32(&x1394, &x1395, x1393, x1365, x1362); + fiat_secp384r1_addcarryx_u32(&x1396, &x1397, x1395, x1363, x1360); + fiat_secp384r1_addcarryx_u32(&x1398, &x1399, x1397, x1361, x1358); + fiat_secp384r1_addcarryx_u32(&x1400, &x1401, x1399, x1359, x1356); + x1402 = (x1401 + x1357); + fiat_secp384r1_addcarryx_u32(&x1403, &x1404, 0x0, x1331, x1378); + fiat_secp384r1_addcarryx_u32(&x1405, &x1406, x1404, x1333, x1380); + fiat_secp384r1_addcarryx_u32(&x1407, &x1408, x1406, x1335, x1382); + fiat_secp384r1_addcarryx_u32(&x1409, &x1410, x1408, x1337, x1384); + fiat_secp384r1_addcarryx_u32(&x1411, &x1412, x1410, x1339, x1386); + fiat_secp384r1_addcarryx_u32(&x1413, &x1414, x1412, x1341, x1388); + fiat_secp384r1_addcarryx_u32(&x1415, &x1416, x1414, x1343, x1390); + fiat_secp384r1_addcarryx_u32(&x1417, &x1418, x1416, x1345, x1392); + fiat_secp384r1_addcarryx_u32(&x1419, &x1420, x1418, x1347, x1394); + fiat_secp384r1_addcarryx_u32(&x1421, &x1422, x1420, x1349, x1396); + fiat_secp384r1_addcarryx_u32(&x1423, &x1424, x1422, x1351, x1398); + fiat_secp384r1_addcarryx_u32(&x1425, &x1426, x1424, x1353, x1400); + fiat_secp384r1_addcarryx_u32(&x1427, &x1428, x1426, x1355, x1402); + fiat_secp384r1_mulx_u32(&x1429, &x1430, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1431, &x1432, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1433, &x1434, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1435, &x1436, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1437, &x1438, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1439, &x1440, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1441, &x1442, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1443, &x1444, x1403, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1445, &x1446, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1447, &x1448, x1403, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1449, &x1450, 0x0, x1446, x1443); + fiat_secp384r1_addcarryx_u32(&x1451, &x1452, x1450, x1444, x1441); + fiat_secp384r1_addcarryx_u32(&x1453, &x1454, x1452, x1442, x1439); + fiat_secp384r1_addcarryx_u32(&x1455, &x1456, x1454, x1440, x1437); + fiat_secp384r1_addcarryx_u32(&x1457, &x1458, x1456, x1438, x1435); + fiat_secp384r1_addcarryx_u32(&x1459, &x1460, x1458, x1436, x1433); + fiat_secp384r1_addcarryx_u32(&x1461, &x1462, x1460, x1434, x1431); + fiat_secp384r1_addcarryx_u32(&x1463, &x1464, x1462, x1432, x1429); + x1465 = (x1464 + x1430); + fiat_secp384r1_addcarryx_u32(&x1466, &x1467, 0x0, x1403, x1447); + fiat_secp384r1_addcarryx_u32(&x1468, &x1469, x1467, x1405, x1448); + fiat_secp384r1_addcarryx_u32(&x1470, &x1471, x1469, x1407, 0x0); + fiat_secp384r1_addcarryx_u32(&x1472, &x1473, x1471, x1409, x1445); + fiat_secp384r1_addcarryx_u32(&x1474, &x1475, x1473, x1411, x1449); + fiat_secp384r1_addcarryx_u32(&x1476, &x1477, x1475, x1413, x1451); + fiat_secp384r1_addcarryx_u32(&x1478, &x1479, x1477, x1415, x1453); + fiat_secp384r1_addcarryx_u32(&x1480, &x1481, x1479, x1417, x1455); + fiat_secp384r1_addcarryx_u32(&x1482, &x1483, x1481, x1419, x1457); + fiat_secp384r1_addcarryx_u32(&x1484, &x1485, x1483, x1421, x1459); + fiat_secp384r1_addcarryx_u32(&x1486, &x1487, x1485, x1423, x1461); + fiat_secp384r1_addcarryx_u32(&x1488, &x1489, x1487, x1425, x1463); + fiat_secp384r1_addcarryx_u32(&x1490, &x1491, x1489, x1427, x1465); + x1492 = ((uint32_t)x1491 + x1428); + fiat_secp384r1_mulx_u32(&x1493, &x1494, x11, (arg1[11])); + fiat_secp384r1_mulx_u32(&x1495, &x1496, x11, (arg1[10])); + fiat_secp384r1_mulx_u32(&x1497, &x1498, x11, (arg1[9])); + fiat_secp384r1_mulx_u32(&x1499, &x1500, x11, (arg1[8])); + fiat_secp384r1_mulx_u32(&x1501, &x1502, x11, (arg1[7])); + fiat_secp384r1_mulx_u32(&x1503, &x1504, x11, (arg1[6])); + fiat_secp384r1_mulx_u32(&x1505, &x1506, x11, (arg1[5])); + fiat_secp384r1_mulx_u32(&x1507, &x1508, x11, (arg1[4])); + fiat_secp384r1_mulx_u32(&x1509, &x1510, x11, (arg1[3])); + fiat_secp384r1_mulx_u32(&x1511, &x1512, x11, (arg1[2])); + fiat_secp384r1_mulx_u32(&x1513, &x1514, x11, (arg1[1])); + fiat_secp384r1_mulx_u32(&x1515, &x1516, x11, (arg1[0])); + fiat_secp384r1_addcarryx_u32(&x1517, &x1518, 0x0, x1516, x1513); + fiat_secp384r1_addcarryx_u32(&x1519, &x1520, x1518, x1514, x1511); + fiat_secp384r1_addcarryx_u32(&x1521, &x1522, x1520, x1512, x1509); + fiat_secp384r1_addcarryx_u32(&x1523, &x1524, x1522, x1510, x1507); + fiat_secp384r1_addcarryx_u32(&x1525, &x1526, x1524, x1508, x1505); + fiat_secp384r1_addcarryx_u32(&x1527, &x1528, x1526, x1506, x1503); + fiat_secp384r1_addcarryx_u32(&x1529, &x1530, x1528, x1504, x1501); + fiat_secp384r1_addcarryx_u32(&x1531, &x1532, x1530, x1502, x1499); + fiat_secp384r1_addcarryx_u32(&x1533, &x1534, x1532, x1500, x1497); + fiat_secp384r1_addcarryx_u32(&x1535, &x1536, x1534, x1498, x1495); + fiat_secp384r1_addcarryx_u32(&x1537, &x1538, x1536, x1496, x1493); + x1539 = (x1538 + x1494); + fiat_secp384r1_addcarryx_u32(&x1540, &x1541, 0x0, x1468, x1515); + fiat_secp384r1_addcarryx_u32(&x1542, &x1543, x1541, x1470, x1517); + fiat_secp384r1_addcarryx_u32(&x1544, &x1545, x1543, x1472, x1519); + fiat_secp384r1_addcarryx_u32(&x1546, &x1547, x1545, x1474, x1521); + fiat_secp384r1_addcarryx_u32(&x1548, &x1549, x1547, x1476, x1523); + fiat_secp384r1_addcarryx_u32(&x1550, &x1551, x1549, x1478, x1525); + fiat_secp384r1_addcarryx_u32(&x1552, &x1553, x1551, x1480, x1527); + fiat_secp384r1_addcarryx_u32(&x1554, &x1555, x1553, x1482, x1529); + fiat_secp384r1_addcarryx_u32(&x1556, &x1557, x1555, x1484, x1531); + fiat_secp384r1_addcarryx_u32(&x1558, &x1559, x1557, x1486, x1533); + fiat_secp384r1_addcarryx_u32(&x1560, &x1561, x1559, x1488, x1535); + fiat_secp384r1_addcarryx_u32(&x1562, &x1563, x1561, x1490, x1537); + fiat_secp384r1_addcarryx_u32(&x1564, &x1565, x1563, x1492, x1539); + fiat_secp384r1_mulx_u32(&x1566, &x1567, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1568, &x1569, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1570, &x1571, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1572, &x1573, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1574, &x1575, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1576, &x1577, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1578, &x1579, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1580, &x1581, x1540, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1582, &x1583, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1584, &x1585, x1540, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1586, &x1587, 0x0, x1583, x1580); + fiat_secp384r1_addcarryx_u32(&x1588, &x1589, x1587, x1581, x1578); + fiat_secp384r1_addcarryx_u32(&x1590, &x1591, x1589, x1579, x1576); + fiat_secp384r1_addcarryx_u32(&x1592, &x1593, x1591, x1577, x1574); + fiat_secp384r1_addcarryx_u32(&x1594, &x1595, x1593, x1575, x1572); + fiat_secp384r1_addcarryx_u32(&x1596, &x1597, x1595, x1573, x1570); + fiat_secp384r1_addcarryx_u32(&x1598, &x1599, x1597, x1571, x1568); + fiat_secp384r1_addcarryx_u32(&x1600, &x1601, x1599, x1569, x1566); + x1602 = (x1601 + x1567); + fiat_secp384r1_addcarryx_u32(&x1603, &x1604, 0x0, x1540, x1584); + fiat_secp384r1_addcarryx_u32(&x1605, &x1606, x1604, x1542, x1585); + fiat_secp384r1_addcarryx_u32(&x1607, &x1608, x1606, x1544, 0x0); + fiat_secp384r1_addcarryx_u32(&x1609, &x1610, x1608, x1546, x1582); + fiat_secp384r1_addcarryx_u32(&x1611, &x1612, x1610, x1548, x1586); + fiat_secp384r1_addcarryx_u32(&x1613, &x1614, x1612, x1550, x1588); + fiat_secp384r1_addcarryx_u32(&x1615, &x1616, x1614, x1552, x1590); + fiat_secp384r1_addcarryx_u32(&x1617, &x1618, x1616, x1554, x1592); + fiat_secp384r1_addcarryx_u32(&x1619, &x1620, x1618, x1556, x1594); + fiat_secp384r1_addcarryx_u32(&x1621, &x1622, x1620, x1558, x1596); + fiat_secp384r1_addcarryx_u32(&x1623, &x1624, x1622, x1560, x1598); + fiat_secp384r1_addcarryx_u32(&x1625, &x1626, x1624, x1562, x1600); + fiat_secp384r1_addcarryx_u32(&x1627, &x1628, x1626, x1564, x1602); + x1629 = ((uint32_t)x1628 + x1565); + fiat_secp384r1_subborrowx_u32(&x1630, &x1631, 0x0, x1605, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1632, &x1633, x1631, x1607, 0x0); + fiat_secp384r1_subborrowx_u32(&x1634, &x1635, x1633, x1609, 0x0); + fiat_secp384r1_subborrowx_u32(&x1636, &x1637, x1635, x1611, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1638, &x1639, x1637, x1613, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x1640, &x1641, x1639, x1615, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1642, &x1643, x1641, x1617, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1644, &x1645, x1643, x1619, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1646, &x1647, x1645, x1621, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1648, &x1649, x1647, x1623, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1650, &x1651, x1649, x1625, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1652, &x1653, x1651, x1627, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1654, &x1655, x1653, x1629, 0x0); + fiat_secp384r1_cmovznz_u32(&x1656, x1655, x1630, x1605); + fiat_secp384r1_cmovznz_u32(&x1657, x1655, x1632, x1607); + fiat_secp384r1_cmovznz_u32(&x1658, x1655, x1634, x1609); + fiat_secp384r1_cmovznz_u32(&x1659, x1655, x1636, x1611); + fiat_secp384r1_cmovznz_u32(&x1660, x1655, x1638, x1613); + fiat_secp384r1_cmovznz_u32(&x1661, x1655, x1640, x1615); + fiat_secp384r1_cmovznz_u32(&x1662, x1655, x1642, x1617); + fiat_secp384r1_cmovznz_u32(&x1663, x1655, x1644, x1619); + fiat_secp384r1_cmovznz_u32(&x1664, x1655, x1646, x1621); + fiat_secp384r1_cmovznz_u32(&x1665, x1655, x1648, x1623); + fiat_secp384r1_cmovznz_u32(&x1666, x1655, x1650, x1625); + fiat_secp384r1_cmovznz_u32(&x1667, x1655, x1652, x1627); + out1[0] = x1656; + out1[1] = x1657; + out1[2] = x1658; + out1[3] = x1659; + out1[4] = x1660; + out1[5] = x1661; + out1[6] = x1662; + out1[7] = x1663; + out1[8] = x1664; + out1[9] = x1665; + out1[10] = x1666; + out1[11] = x1667; +} + +/* + * The function fiat_secp384r1_add adds two field elements in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_add( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) +{ + uint32_t x1; + fiat_secp384r1_uint1 x2; + uint32_t x3; + fiat_secp384r1_uint1 x4; + uint32_t x5; + fiat_secp384r1_uint1 x6; + uint32_t x7; + fiat_secp384r1_uint1 x8; + uint32_t x9; + fiat_secp384r1_uint1 x10; + uint32_t x11; + fiat_secp384r1_uint1 x12; + uint32_t x13; + fiat_secp384r1_uint1 x14; + uint32_t x15; + fiat_secp384r1_uint1 x16; + uint32_t x17; + fiat_secp384r1_uint1 x18; + uint32_t x19; + fiat_secp384r1_uint1 x20; + uint32_t x21; + fiat_secp384r1_uint1 x22; + uint32_t x23; + fiat_secp384r1_uint1 x24; + uint32_t x25; + fiat_secp384r1_uint1 x26; + uint32_t x27; + fiat_secp384r1_uint1 x28; + uint32_t x29; + fiat_secp384r1_uint1 x30; + uint32_t x31; + fiat_secp384r1_uint1 x32; + uint32_t x33; + fiat_secp384r1_uint1 x34; + uint32_t x35; + fiat_secp384r1_uint1 x36; + uint32_t x37; + fiat_secp384r1_uint1 x38; + uint32_t x39; + fiat_secp384r1_uint1 x40; + uint32_t x41; + fiat_secp384r1_uint1 x42; + uint32_t x43; + fiat_secp384r1_uint1 x44; + uint32_t x45; + fiat_secp384r1_uint1 x46; + uint32_t x47; + fiat_secp384r1_uint1 x48; + uint32_t x49; + fiat_secp384r1_uint1 x50; + uint32_t x51; + uint32_t x52; + uint32_t x53; + uint32_t x54; + uint32_t x55; + uint32_t x56; + uint32_t x57; + uint32_t x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + fiat_secp384r1_addcarryx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); + fiat_secp384r1_addcarryx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1])); + fiat_secp384r1_addcarryx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2])); + fiat_secp384r1_addcarryx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3])); + fiat_secp384r1_addcarryx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4])); + fiat_secp384r1_addcarryx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5])); + fiat_secp384r1_addcarryx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6])); + fiat_secp384r1_addcarryx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7])); + fiat_secp384r1_addcarryx_u32(&x17, &x18, x16, (arg1[8]), (arg2[8])); + fiat_secp384r1_addcarryx_u32(&x19, &x20, x18, (arg1[9]), (arg2[9])); + fiat_secp384r1_addcarryx_u32(&x21, &x22, x20, (arg1[10]), (arg2[10])); + fiat_secp384r1_addcarryx_u32(&x23, &x24, x22, (arg1[11]), (arg2[11])); + fiat_secp384r1_subborrowx_u32(&x25, &x26, 0x0, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x27, &x28, x26, x3, 0x0); + fiat_secp384r1_subborrowx_u32(&x29, &x30, x28, x5, 0x0); + fiat_secp384r1_subborrowx_u32(&x31, &x32, x30, x7, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x33, &x34, x32, x9, UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x35, &x36, x34, x11, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x37, &x38, x36, x13, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x39, &x40, x38, x15, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x41, &x42, x40, x17, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x43, &x44, x42, x19, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x45, &x46, x44, x21, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x47, &x48, x46, x23, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x49, &x50, x48, x24, 0x0); + fiat_secp384r1_cmovznz_u32(&x51, x50, x25, x1); + fiat_secp384r1_cmovznz_u32(&x52, x50, x27, x3); + fiat_secp384r1_cmovznz_u32(&x53, x50, x29, x5); + fiat_secp384r1_cmovznz_u32(&x54, x50, x31, x7); + fiat_secp384r1_cmovznz_u32(&x55, x50, x33, x9); + fiat_secp384r1_cmovznz_u32(&x56, x50, x35, x11); + fiat_secp384r1_cmovznz_u32(&x57, x50, x37, x13); + fiat_secp384r1_cmovznz_u32(&x58, x50, x39, x15); + fiat_secp384r1_cmovznz_u32(&x59, x50, x41, x17); + fiat_secp384r1_cmovznz_u32(&x60, x50, x43, x19); + fiat_secp384r1_cmovznz_u32(&x61, x50, x45, x21); + fiat_secp384r1_cmovznz_u32(&x62, x50, x47, x23); + out1[0] = x51; + out1[1] = x52; + out1[2] = x53; + out1[3] = x54; + out1[4] = x55; + out1[5] = x56; + out1[6] = x57; + out1[7] = x58; + out1[8] = x59; + out1[9] = x60; + out1[10] = x61; + out1[11] = x62; +} + +/* + * The function fiat_secp384r1_sub subtracts two field elements in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_sub( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) +{ + uint32_t x1; + fiat_secp384r1_uint1 x2; + uint32_t x3; + fiat_secp384r1_uint1 x4; + uint32_t x5; + fiat_secp384r1_uint1 x6; + uint32_t x7; + fiat_secp384r1_uint1 x8; + uint32_t x9; + fiat_secp384r1_uint1 x10; + uint32_t x11; + fiat_secp384r1_uint1 x12; + uint32_t x13; + fiat_secp384r1_uint1 x14; + uint32_t x15; + fiat_secp384r1_uint1 x16; + uint32_t x17; + fiat_secp384r1_uint1 x18; + uint32_t x19; + fiat_secp384r1_uint1 x20; + uint32_t x21; + fiat_secp384r1_uint1 x22; + uint32_t x23; + fiat_secp384r1_uint1 x24; + uint32_t x25; + uint32_t x26; + fiat_secp384r1_uint1 x27; + uint32_t x28; + fiat_secp384r1_uint1 x29; + uint32_t x30; + fiat_secp384r1_uint1 x31; + uint32_t x32; + fiat_secp384r1_uint1 x33; + uint32_t x34; + fiat_secp384r1_uint1 x35; + uint32_t x36; + fiat_secp384r1_uint1 x37; + uint32_t x38; + fiat_secp384r1_uint1 x39; + uint32_t x40; + fiat_secp384r1_uint1 x41; + uint32_t x42; + fiat_secp384r1_uint1 x43; + uint32_t x44; + fiat_secp384r1_uint1 x45; + uint32_t x46; + fiat_secp384r1_uint1 x47; + uint32_t x48; + fiat_secp384r1_uint1 x49; + fiat_secp384r1_subborrowx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); + fiat_secp384r1_subborrowx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1])); + fiat_secp384r1_subborrowx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2])); + fiat_secp384r1_subborrowx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3])); + fiat_secp384r1_subborrowx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4])); + fiat_secp384r1_subborrowx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5])); + fiat_secp384r1_subborrowx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6])); + fiat_secp384r1_subborrowx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7])); + fiat_secp384r1_subborrowx_u32(&x17, &x18, x16, (arg1[8]), (arg2[8])); + fiat_secp384r1_subborrowx_u32(&x19, &x20, x18, (arg1[9]), (arg2[9])); + fiat_secp384r1_subborrowx_u32(&x21, &x22, x20, (arg1[10]), (arg2[10])); + fiat_secp384r1_subborrowx_u32(&x23, &x24, x22, (arg1[11]), (arg2[11])); + fiat_secp384r1_cmovznz_u32(&x25, x24, 0x0, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x26, &x27, 0x0, x1, x25); + fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, x3, 0x0); + fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, x5, 0x0); + fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, x7, x25); + fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, x9, + (x25 & UINT32_C(0xfffffffe))); + fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, x11, x25); + fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, x13, x25); + fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, x15, x25); + fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, x17, x25); + fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, x19, x25); + fiat_secp384r1_addcarryx_u32(&x46, &x47, x45, x21, x25); + fiat_secp384r1_addcarryx_u32(&x48, &x49, x47, x23, x25); + out1[0] = x26; + out1[1] = x28; + out1[2] = x30; + out1[3] = x32; + out1[4] = x34; + out1[5] = x36; + out1[6] = x38; + out1[7] = x40; + out1[8] = x42; + out1[9] = x44; + out1[10] = x46; + out1[11] = x48; +} + +/* + * The function fiat_secp384r1_opp negates a field element in the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_opp( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) +{ + uint32_t x1; + fiat_secp384r1_uint1 x2; + uint32_t x3; + fiat_secp384r1_uint1 x4; + uint32_t x5; + fiat_secp384r1_uint1 x6; + uint32_t x7; + fiat_secp384r1_uint1 x8; + uint32_t x9; + fiat_secp384r1_uint1 x10; + uint32_t x11; + fiat_secp384r1_uint1 x12; + uint32_t x13; + fiat_secp384r1_uint1 x14; + uint32_t x15; + fiat_secp384r1_uint1 x16; + uint32_t x17; + fiat_secp384r1_uint1 x18; + uint32_t x19; + fiat_secp384r1_uint1 x20; + uint32_t x21; + fiat_secp384r1_uint1 x22; + uint32_t x23; + fiat_secp384r1_uint1 x24; + uint32_t x25; + uint32_t x26; + fiat_secp384r1_uint1 x27; + uint32_t x28; + fiat_secp384r1_uint1 x29; + uint32_t x30; + fiat_secp384r1_uint1 x31; + uint32_t x32; + fiat_secp384r1_uint1 x33; + uint32_t x34; + fiat_secp384r1_uint1 x35; + uint32_t x36; + fiat_secp384r1_uint1 x37; + uint32_t x38; + fiat_secp384r1_uint1 x39; + uint32_t x40; + fiat_secp384r1_uint1 x41; + uint32_t x42; + fiat_secp384r1_uint1 x43; + uint32_t x44; + fiat_secp384r1_uint1 x45; + uint32_t x46; + fiat_secp384r1_uint1 x47; + uint32_t x48; + fiat_secp384r1_uint1 x49; + fiat_secp384r1_subborrowx_u32(&x1, &x2, 0x0, 0x0, (arg1[0])); + fiat_secp384r1_subborrowx_u32(&x3, &x4, x2, 0x0, (arg1[1])); + fiat_secp384r1_subborrowx_u32(&x5, &x6, x4, 0x0, (arg1[2])); + fiat_secp384r1_subborrowx_u32(&x7, &x8, x6, 0x0, (arg1[3])); + fiat_secp384r1_subborrowx_u32(&x9, &x10, x8, 0x0, (arg1[4])); + fiat_secp384r1_subborrowx_u32(&x11, &x12, x10, 0x0, (arg1[5])); + fiat_secp384r1_subborrowx_u32(&x13, &x14, x12, 0x0, (arg1[6])); + fiat_secp384r1_subborrowx_u32(&x15, &x16, x14, 0x0, (arg1[7])); + fiat_secp384r1_subborrowx_u32(&x17, &x18, x16, 0x0, (arg1[8])); + fiat_secp384r1_subborrowx_u32(&x19, &x20, x18, 0x0, (arg1[9])); + fiat_secp384r1_subborrowx_u32(&x21, &x22, x20, 0x0, (arg1[10])); + fiat_secp384r1_subborrowx_u32(&x23, &x24, x22, 0x0, (arg1[11])); + fiat_secp384r1_cmovznz_u32(&x25, x24, 0x0, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x26, &x27, 0x0, x1, x25); + fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, x3, 0x0); + fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, x5, 0x0); + fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, x7, x25); + fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, x9, + (x25 & UINT32_C(0xfffffffe))); + fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, x11, x25); + fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, x13, x25); + fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, x15, x25); + fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, x17, x25); + fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, x19, x25); + fiat_secp384r1_addcarryx_u32(&x46, &x47, x45, x21, x25); + fiat_secp384r1_addcarryx_u32(&x48, &x49, x47, x23, x25); + out1[0] = x26; + out1[1] = x28; + out1[2] = x30; + out1[3] = x32; + out1[4] = x34; + out1[5] = x36; + out1[6] = x38; + out1[7] = x40; + out1[8] = x42; + out1[9] = x44; + out1[10] = x46; + out1[11] = x48; +} + +/* + * The function fiat_secp384r1_from_montgomery translates a field element out of the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval out1 mod m = (eval arg1 * ((2^32)⁻¹ mod m)^12) mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_from_montgomery( + fiat_secp384r1_non_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + fiat_secp384r1_uint1 x23; + uint32_t x24; + fiat_secp384r1_uint1 x25; + uint32_t x26; + fiat_secp384r1_uint1 x27; + uint32_t x28; + fiat_secp384r1_uint1 x29; + uint32_t x30; + fiat_secp384r1_uint1 x31; + uint32_t x32; + fiat_secp384r1_uint1 x33; + uint32_t x34; + fiat_secp384r1_uint1 x35; + uint32_t x36; + fiat_secp384r1_uint1 x37; + uint32_t x38; + fiat_secp384r1_uint1 x39; + uint32_t x40; + fiat_secp384r1_uint1 x41; + uint32_t x42; + uint32_t x43; + uint32_t x44; + uint32_t x45; + uint32_t x46; + uint32_t x47; + uint32_t x48; + uint32_t x49; + uint32_t x50; + uint32_t x51; + uint32_t x52; + uint32_t x53; + uint32_t x54; + uint32_t x55; + uint32_t x56; + uint32_t x57; + uint32_t x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + fiat_secp384r1_uint1 x63; + uint32_t x64; + fiat_secp384r1_uint1 x65; + uint32_t x66; + fiat_secp384r1_uint1 x67; + uint32_t x68; + fiat_secp384r1_uint1 x69; + uint32_t x70; + fiat_secp384r1_uint1 x71; + uint32_t x72; + fiat_secp384r1_uint1 x73; + uint32_t x74; + fiat_secp384r1_uint1 x75; + uint32_t x76; + fiat_secp384r1_uint1 x77; + uint32_t x78; + fiat_secp384r1_uint1 x79; + uint32_t x80; + fiat_secp384r1_uint1 x81; + uint32_t x82; + fiat_secp384r1_uint1 x83; + uint32_t x84; + fiat_secp384r1_uint1 x85; + uint32_t x86; + fiat_secp384r1_uint1 x87; + uint32_t x88; + fiat_secp384r1_uint1 x89; + uint32_t x90; + fiat_secp384r1_uint1 x91; + uint32_t x92; + fiat_secp384r1_uint1 x93; + uint32_t x94; + fiat_secp384r1_uint1 x95; + uint32_t x96; + fiat_secp384r1_uint1 x97; + uint32_t x98; + fiat_secp384r1_uint1 x99; + uint32_t x100; + fiat_secp384r1_uint1 x101; + uint32_t x102; + fiat_secp384r1_uint1 x103; + uint32_t x104; + fiat_secp384r1_uint1 x105; + uint32_t x106; + fiat_secp384r1_uint1 x107; + uint32_t x108; + fiat_secp384r1_uint1 x109; + uint32_t x110; + fiat_secp384r1_uint1 x111; + uint32_t x112; + fiat_secp384r1_uint1 x113; + uint32_t x114; + fiat_secp384r1_uint1 x115; + uint32_t x116; + fiat_secp384r1_uint1 x117; + uint32_t x118; + fiat_secp384r1_uint1 x119; + uint32_t x120; + fiat_secp384r1_uint1 x121; + uint32_t x122; + fiat_secp384r1_uint1 x123; + uint32_t x124; + fiat_secp384r1_uint1 x125; + uint32_t x126; + fiat_secp384r1_uint1 x127; + uint32_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint32_t x133; + uint32_t x134; + uint32_t x135; + uint32_t x136; + uint32_t x137; + uint32_t x138; + uint32_t x139; + uint32_t x140; + uint32_t x141; + uint32_t x142; + uint32_t x143; + uint32_t x144; + uint32_t x145; + uint32_t x146; + uint32_t x147; + uint32_t x148; + fiat_secp384r1_uint1 x149; + uint32_t x150; + fiat_secp384r1_uint1 x151; + uint32_t x152; + fiat_secp384r1_uint1 x153; + uint32_t x154; + fiat_secp384r1_uint1 x155; + uint32_t x156; + fiat_secp384r1_uint1 x157; + uint32_t x158; + fiat_secp384r1_uint1 x159; + uint32_t x160; + fiat_secp384r1_uint1 x161; + uint32_t x162; + fiat_secp384r1_uint1 x163; + uint32_t x164; + fiat_secp384r1_uint1 x165; + uint32_t x166; + fiat_secp384r1_uint1 x167; + uint32_t x168; + fiat_secp384r1_uint1 x169; + uint32_t x170; + fiat_secp384r1_uint1 x171; + uint32_t x172; + fiat_secp384r1_uint1 x173; + uint32_t x174; + fiat_secp384r1_uint1 x175; + uint32_t x176; + fiat_secp384r1_uint1 x177; + uint32_t x178; + fiat_secp384r1_uint1 x179; + uint32_t x180; + fiat_secp384r1_uint1 x181; + uint32_t x182; + fiat_secp384r1_uint1 x183; + uint32_t x184; + fiat_secp384r1_uint1 x185; + uint32_t x186; + fiat_secp384r1_uint1 x187; + uint32_t x188; + fiat_secp384r1_uint1 x189; + uint32_t x190; + fiat_secp384r1_uint1 x191; + uint32_t x192; + fiat_secp384r1_uint1 x193; + uint32_t x194; + fiat_secp384r1_uint1 x195; + uint32_t x196; + fiat_secp384r1_uint1 x197; + uint32_t x198; + fiat_secp384r1_uint1 x199; + uint32_t x200; + fiat_secp384r1_uint1 x201; + uint32_t x202; + fiat_secp384r1_uint1 x203; + uint32_t x204; + fiat_secp384r1_uint1 x205; + uint32_t x206; + fiat_secp384r1_uint1 x207; + uint32_t x208; + fiat_secp384r1_uint1 x209; + uint32_t x210; + fiat_secp384r1_uint1 x211; + uint32_t x212; + fiat_secp384r1_uint1 x213; + uint32_t x214; + uint32_t x215; + uint32_t x216; + uint32_t x217; + uint32_t x218; + uint32_t x219; + uint32_t x220; + uint32_t x221; + uint32_t x222; + uint32_t x223; + uint32_t x224; + uint32_t x225; + uint32_t x226; + uint32_t x227; + uint32_t x228; + uint32_t x229; + uint32_t x230; + uint32_t x231; + uint32_t x232; + uint32_t x233; + uint32_t x234; + fiat_secp384r1_uint1 x235; + uint32_t x236; + fiat_secp384r1_uint1 x237; + uint32_t x238; + fiat_secp384r1_uint1 x239; + uint32_t x240; + fiat_secp384r1_uint1 x241; + uint32_t x242; + fiat_secp384r1_uint1 x243; + uint32_t x244; + fiat_secp384r1_uint1 x245; + uint32_t x246; + fiat_secp384r1_uint1 x247; + uint32_t x248; + fiat_secp384r1_uint1 x249; + uint32_t x250; + fiat_secp384r1_uint1 x251; + uint32_t x252; + fiat_secp384r1_uint1 x253; + uint32_t x254; + fiat_secp384r1_uint1 x255; + uint32_t x256; + fiat_secp384r1_uint1 x257; + uint32_t x258; + fiat_secp384r1_uint1 x259; + uint32_t x260; + fiat_secp384r1_uint1 x261; + uint32_t x262; + fiat_secp384r1_uint1 x263; + uint32_t x264; + fiat_secp384r1_uint1 x265; + uint32_t x266; + fiat_secp384r1_uint1 x267; + uint32_t x268; + fiat_secp384r1_uint1 x269; + uint32_t x270; + fiat_secp384r1_uint1 x271; + uint32_t x272; + fiat_secp384r1_uint1 x273; + uint32_t x274; + fiat_secp384r1_uint1 x275; + uint32_t x276; + fiat_secp384r1_uint1 x277; + uint32_t x278; + fiat_secp384r1_uint1 x279; + uint32_t x280; + fiat_secp384r1_uint1 x281; + uint32_t x282; + fiat_secp384r1_uint1 x283; + uint32_t x284; + fiat_secp384r1_uint1 x285; + uint32_t x286; + fiat_secp384r1_uint1 x287; + uint32_t x288; + fiat_secp384r1_uint1 x289; + uint32_t x290; + fiat_secp384r1_uint1 x291; + uint32_t x292; + fiat_secp384r1_uint1 x293; + uint32_t x294; + fiat_secp384r1_uint1 x295; + uint32_t x296; + fiat_secp384r1_uint1 x297; + uint32_t x298; + fiat_secp384r1_uint1 x299; + uint32_t x300; + uint32_t x301; + uint32_t x302; + uint32_t x303; + uint32_t x304; + uint32_t x305; + uint32_t x306; + uint32_t x307; + uint32_t x308; + uint32_t x309; + uint32_t x310; + uint32_t x311; + uint32_t x312; + uint32_t x313; + uint32_t x314; + uint32_t x315; + uint32_t x316; + uint32_t x317; + uint32_t x318; + uint32_t x319; + uint32_t x320; + fiat_secp384r1_uint1 x321; + uint32_t x322; + fiat_secp384r1_uint1 x323; + uint32_t x324; + fiat_secp384r1_uint1 x325; + uint32_t x326; + fiat_secp384r1_uint1 x327; + uint32_t x328; + fiat_secp384r1_uint1 x329; + uint32_t x330; + fiat_secp384r1_uint1 x331; + uint32_t x332; + fiat_secp384r1_uint1 x333; + uint32_t x334; + fiat_secp384r1_uint1 x335; + uint32_t x336; + fiat_secp384r1_uint1 x337; + uint32_t x338; + fiat_secp384r1_uint1 x339; + uint32_t x340; + fiat_secp384r1_uint1 x341; + uint32_t x342; + fiat_secp384r1_uint1 x343; + uint32_t x344; + fiat_secp384r1_uint1 x345; + uint32_t x346; + fiat_secp384r1_uint1 x347; + uint32_t x348; + fiat_secp384r1_uint1 x349; + uint32_t x350; + fiat_secp384r1_uint1 x351; + uint32_t x352; + fiat_secp384r1_uint1 x353; + uint32_t x354; + fiat_secp384r1_uint1 x355; + uint32_t x356; + fiat_secp384r1_uint1 x357; + uint32_t x358; + fiat_secp384r1_uint1 x359; + uint32_t x360; + fiat_secp384r1_uint1 x361; + uint32_t x362; + fiat_secp384r1_uint1 x363; + uint32_t x364; + fiat_secp384r1_uint1 x365; + uint32_t x366; + fiat_secp384r1_uint1 x367; + uint32_t x368; + fiat_secp384r1_uint1 x369; + uint32_t x370; + fiat_secp384r1_uint1 x371; + uint32_t x372; + fiat_secp384r1_uint1 x373; + uint32_t x374; + fiat_secp384r1_uint1 x375; + uint32_t x376; + fiat_secp384r1_uint1 x377; + uint32_t x378; + fiat_secp384r1_uint1 x379; + uint32_t x380; + fiat_secp384r1_uint1 x381; + uint32_t x382; + fiat_secp384r1_uint1 x383; + uint32_t x384; + fiat_secp384r1_uint1 x385; + uint32_t x386; + uint32_t x387; + uint32_t x388; + uint32_t x389; + uint32_t x390; + uint32_t x391; + uint32_t x392; + uint32_t x393; + uint32_t x394; + uint32_t x395; + uint32_t x396; + uint32_t x397; + uint32_t x398; + uint32_t x399; + uint32_t x400; + uint32_t x401; + uint32_t x402; + uint32_t x403; + uint32_t x404; + uint32_t x405; + uint32_t x406; + fiat_secp384r1_uint1 x407; + uint32_t x408; + fiat_secp384r1_uint1 x409; + uint32_t x410; + fiat_secp384r1_uint1 x411; + uint32_t x412; + fiat_secp384r1_uint1 x413; + uint32_t x414; + fiat_secp384r1_uint1 x415; + uint32_t x416; + fiat_secp384r1_uint1 x417; + uint32_t x418; + fiat_secp384r1_uint1 x419; + uint32_t x420; + fiat_secp384r1_uint1 x421; + uint32_t x422; + fiat_secp384r1_uint1 x423; + uint32_t x424; + fiat_secp384r1_uint1 x425; + uint32_t x426; + fiat_secp384r1_uint1 x427; + uint32_t x428; + fiat_secp384r1_uint1 x429; + uint32_t x430; + fiat_secp384r1_uint1 x431; + uint32_t x432; + fiat_secp384r1_uint1 x433; + uint32_t x434; + fiat_secp384r1_uint1 x435; + uint32_t x436; + fiat_secp384r1_uint1 x437; + uint32_t x438; + fiat_secp384r1_uint1 x439; + uint32_t x440; + fiat_secp384r1_uint1 x441; + uint32_t x442; + fiat_secp384r1_uint1 x443; + uint32_t x444; + fiat_secp384r1_uint1 x445; + uint32_t x446; + fiat_secp384r1_uint1 x447; + uint32_t x448; + fiat_secp384r1_uint1 x449; + uint32_t x450; + fiat_secp384r1_uint1 x451; + uint32_t x452; + fiat_secp384r1_uint1 x453; + uint32_t x454; + fiat_secp384r1_uint1 x455; + uint32_t x456; + fiat_secp384r1_uint1 x457; + uint32_t x458; + fiat_secp384r1_uint1 x459; + uint32_t x460; + fiat_secp384r1_uint1 x461; + uint32_t x462; + fiat_secp384r1_uint1 x463; + uint32_t x464; + fiat_secp384r1_uint1 x465; + uint32_t x466; + fiat_secp384r1_uint1 x467; + uint32_t x468; + fiat_secp384r1_uint1 x469; + uint32_t x470; + fiat_secp384r1_uint1 x471; + uint32_t x472; + uint32_t x473; + uint32_t x474; + uint32_t x475; + uint32_t x476; + uint32_t x477; + uint32_t x478; + uint32_t x479; + uint32_t x480; + uint32_t x481; + uint32_t x482; + uint32_t x483; + uint32_t x484; + uint32_t x485; + uint32_t x486; + uint32_t x487; + uint32_t x488; + uint32_t x489; + uint32_t x490; + uint32_t x491; + uint32_t x492; + fiat_secp384r1_uint1 x493; + uint32_t x494; + fiat_secp384r1_uint1 x495; + uint32_t x496; + fiat_secp384r1_uint1 x497; + uint32_t x498; + fiat_secp384r1_uint1 x499; + uint32_t x500; + fiat_secp384r1_uint1 x501; + uint32_t x502; + fiat_secp384r1_uint1 x503; + uint32_t x504; + fiat_secp384r1_uint1 x505; + uint32_t x506; + fiat_secp384r1_uint1 x507; + uint32_t x508; + fiat_secp384r1_uint1 x509; + uint32_t x510; + fiat_secp384r1_uint1 x511; + uint32_t x512; + fiat_secp384r1_uint1 x513; + uint32_t x514; + fiat_secp384r1_uint1 x515; + uint32_t x516; + fiat_secp384r1_uint1 x517; + uint32_t x518; + fiat_secp384r1_uint1 x519; + uint32_t x520; + fiat_secp384r1_uint1 x521; + uint32_t x522; + fiat_secp384r1_uint1 x523; + uint32_t x524; + fiat_secp384r1_uint1 x525; + uint32_t x526; + fiat_secp384r1_uint1 x527; + uint32_t x528; + fiat_secp384r1_uint1 x529; + uint32_t x530; + fiat_secp384r1_uint1 x531; + uint32_t x532; + fiat_secp384r1_uint1 x533; + uint32_t x534; + fiat_secp384r1_uint1 x535; + uint32_t x536; + fiat_secp384r1_uint1 x537; + uint32_t x538; + fiat_secp384r1_uint1 x539; + uint32_t x540; + fiat_secp384r1_uint1 x541; + uint32_t x542; + fiat_secp384r1_uint1 x543; + uint32_t x544; + fiat_secp384r1_uint1 x545; + uint32_t x546; + fiat_secp384r1_uint1 x547; + uint32_t x548; + fiat_secp384r1_uint1 x549; + uint32_t x550; + fiat_secp384r1_uint1 x551; + uint32_t x552; + fiat_secp384r1_uint1 x553; + uint32_t x554; + fiat_secp384r1_uint1 x555; + uint32_t x556; + fiat_secp384r1_uint1 x557; + uint32_t x558; + uint32_t x559; + uint32_t x560; + uint32_t x561; + uint32_t x562; + uint32_t x563; + uint32_t x564; + uint32_t x565; + uint32_t x566; + uint32_t x567; + uint32_t x568; + uint32_t x569; + uint32_t x570; + uint32_t x571; + uint32_t x572; + uint32_t x573; + uint32_t x574; + uint32_t x575; + uint32_t x576; + uint32_t x577; + uint32_t x578; + fiat_secp384r1_uint1 x579; + uint32_t x580; + fiat_secp384r1_uint1 x581; + uint32_t x582; + fiat_secp384r1_uint1 x583; + uint32_t x584; + fiat_secp384r1_uint1 x585; + uint32_t x586; + fiat_secp384r1_uint1 x587; + uint32_t x588; + fiat_secp384r1_uint1 x589; + uint32_t x590; + fiat_secp384r1_uint1 x591; + uint32_t x592; + fiat_secp384r1_uint1 x593; + uint32_t x594; + fiat_secp384r1_uint1 x595; + uint32_t x596; + fiat_secp384r1_uint1 x597; + uint32_t x598; + fiat_secp384r1_uint1 x599; + uint32_t x600; + fiat_secp384r1_uint1 x601; + uint32_t x602; + fiat_secp384r1_uint1 x603; + uint32_t x604; + fiat_secp384r1_uint1 x605; + uint32_t x606; + fiat_secp384r1_uint1 x607; + uint32_t x608; + fiat_secp384r1_uint1 x609; + uint32_t x610; + fiat_secp384r1_uint1 x611; + uint32_t x612; + fiat_secp384r1_uint1 x613; + uint32_t x614; + fiat_secp384r1_uint1 x615; + uint32_t x616; + fiat_secp384r1_uint1 x617; + uint32_t x618; + fiat_secp384r1_uint1 x619; + uint32_t x620; + fiat_secp384r1_uint1 x621; + uint32_t x622; + fiat_secp384r1_uint1 x623; + uint32_t x624; + fiat_secp384r1_uint1 x625; + uint32_t x626; + fiat_secp384r1_uint1 x627; + uint32_t x628; + fiat_secp384r1_uint1 x629; + uint32_t x630; + fiat_secp384r1_uint1 x631; + uint32_t x632; + fiat_secp384r1_uint1 x633; + uint32_t x634; + fiat_secp384r1_uint1 x635; + uint32_t x636; + fiat_secp384r1_uint1 x637; + uint32_t x638; + fiat_secp384r1_uint1 x639; + uint32_t x640; + fiat_secp384r1_uint1 x641; + uint32_t x642; + fiat_secp384r1_uint1 x643; + uint32_t x644; + uint32_t x645; + uint32_t x646; + uint32_t x647; + uint32_t x648; + uint32_t x649; + uint32_t x650; + uint32_t x651; + uint32_t x652; + uint32_t x653; + uint32_t x654; + uint32_t x655; + uint32_t x656; + uint32_t x657; + uint32_t x658; + uint32_t x659; + uint32_t x660; + uint32_t x661; + uint32_t x662; + uint32_t x663; + uint32_t x664; + fiat_secp384r1_uint1 x665; + uint32_t x666; + fiat_secp384r1_uint1 x667; + uint32_t x668; + fiat_secp384r1_uint1 x669; + uint32_t x670; + fiat_secp384r1_uint1 x671; + uint32_t x672; + fiat_secp384r1_uint1 x673; + uint32_t x674; + fiat_secp384r1_uint1 x675; + uint32_t x676; + fiat_secp384r1_uint1 x677; + uint32_t x678; + fiat_secp384r1_uint1 x679; + uint32_t x680; + fiat_secp384r1_uint1 x681; + uint32_t x682; + fiat_secp384r1_uint1 x683; + uint32_t x684; + fiat_secp384r1_uint1 x685; + uint32_t x686; + fiat_secp384r1_uint1 x687; + uint32_t x688; + fiat_secp384r1_uint1 x689; + uint32_t x690; + fiat_secp384r1_uint1 x691; + uint32_t x692; + fiat_secp384r1_uint1 x693; + uint32_t x694; + fiat_secp384r1_uint1 x695; + uint32_t x696; + fiat_secp384r1_uint1 x697; + uint32_t x698; + fiat_secp384r1_uint1 x699; + uint32_t x700; + fiat_secp384r1_uint1 x701; + uint32_t x702; + fiat_secp384r1_uint1 x703; + uint32_t x704; + fiat_secp384r1_uint1 x705; + uint32_t x706; + fiat_secp384r1_uint1 x707; + uint32_t x708; + fiat_secp384r1_uint1 x709; + uint32_t x710; + fiat_secp384r1_uint1 x711; + uint32_t x712; + fiat_secp384r1_uint1 x713; + uint32_t x714; + fiat_secp384r1_uint1 x715; + uint32_t x716; + fiat_secp384r1_uint1 x717; + uint32_t x718; + fiat_secp384r1_uint1 x719; + uint32_t x720; + fiat_secp384r1_uint1 x721; + uint32_t x722; + fiat_secp384r1_uint1 x723; + uint32_t x724; + fiat_secp384r1_uint1 x725; + uint32_t x726; + fiat_secp384r1_uint1 x727; + uint32_t x728; + fiat_secp384r1_uint1 x729; + uint32_t x730; + uint32_t x731; + uint32_t x732; + uint32_t x733; + uint32_t x734; + uint32_t x735; + uint32_t x736; + uint32_t x737; + uint32_t x738; + uint32_t x739; + uint32_t x740; + uint32_t x741; + uint32_t x742; + uint32_t x743; + uint32_t x744; + uint32_t x745; + uint32_t x746; + uint32_t x747; + uint32_t x748; + uint32_t x749; + uint32_t x750; + fiat_secp384r1_uint1 x751; + uint32_t x752; + fiat_secp384r1_uint1 x753; + uint32_t x754; + fiat_secp384r1_uint1 x755; + uint32_t x756; + fiat_secp384r1_uint1 x757; + uint32_t x758; + fiat_secp384r1_uint1 x759; + uint32_t x760; + fiat_secp384r1_uint1 x761; + uint32_t x762; + fiat_secp384r1_uint1 x763; + uint32_t x764; + fiat_secp384r1_uint1 x765; + uint32_t x766; + fiat_secp384r1_uint1 x767; + uint32_t x768; + fiat_secp384r1_uint1 x769; + uint32_t x770; + fiat_secp384r1_uint1 x771; + uint32_t x772; + fiat_secp384r1_uint1 x773; + uint32_t x774; + fiat_secp384r1_uint1 x775; + uint32_t x776; + fiat_secp384r1_uint1 x777; + uint32_t x778; + fiat_secp384r1_uint1 x779; + uint32_t x780; + fiat_secp384r1_uint1 x781; + uint32_t x782; + fiat_secp384r1_uint1 x783; + uint32_t x784; + fiat_secp384r1_uint1 x785; + uint32_t x786; + fiat_secp384r1_uint1 x787; + uint32_t x788; + fiat_secp384r1_uint1 x789; + uint32_t x790; + fiat_secp384r1_uint1 x791; + uint32_t x792; + fiat_secp384r1_uint1 x793; + uint32_t x794; + fiat_secp384r1_uint1 x795; + uint32_t x796; + fiat_secp384r1_uint1 x797; + uint32_t x798; + fiat_secp384r1_uint1 x799; + uint32_t x800; + fiat_secp384r1_uint1 x801; + uint32_t x802; + fiat_secp384r1_uint1 x803; + uint32_t x804; + fiat_secp384r1_uint1 x805; + uint32_t x806; + fiat_secp384r1_uint1 x807; + uint32_t x808; + fiat_secp384r1_uint1 x809; + uint32_t x810; + fiat_secp384r1_uint1 x811; + uint32_t x812; + fiat_secp384r1_uint1 x813; + uint32_t x814; + fiat_secp384r1_uint1 x815; + uint32_t x816; + uint32_t x817; + uint32_t x818; + uint32_t x819; + uint32_t x820; + uint32_t x821; + uint32_t x822; + uint32_t x823; + uint32_t x824; + uint32_t x825; + uint32_t x826; + uint32_t x827; + uint32_t x828; + uint32_t x829; + uint32_t x830; + uint32_t x831; + uint32_t x832; + uint32_t x833; + uint32_t x834; + uint32_t x835; + uint32_t x836; + fiat_secp384r1_uint1 x837; + uint32_t x838; + fiat_secp384r1_uint1 x839; + uint32_t x840; + fiat_secp384r1_uint1 x841; + uint32_t x842; + fiat_secp384r1_uint1 x843; + uint32_t x844; + fiat_secp384r1_uint1 x845; + uint32_t x846; + fiat_secp384r1_uint1 x847; + uint32_t x848; + fiat_secp384r1_uint1 x849; + uint32_t x850; + fiat_secp384r1_uint1 x851; + uint32_t x852; + fiat_secp384r1_uint1 x853; + uint32_t x854; + fiat_secp384r1_uint1 x855; + uint32_t x856; + fiat_secp384r1_uint1 x857; + uint32_t x858; + fiat_secp384r1_uint1 x859; + uint32_t x860; + fiat_secp384r1_uint1 x861; + uint32_t x862; + fiat_secp384r1_uint1 x863; + uint32_t x864; + fiat_secp384r1_uint1 x865; + uint32_t x866; + fiat_secp384r1_uint1 x867; + uint32_t x868; + fiat_secp384r1_uint1 x869; + uint32_t x870; + fiat_secp384r1_uint1 x871; + uint32_t x872; + fiat_secp384r1_uint1 x873; + uint32_t x874; + fiat_secp384r1_uint1 x875; + uint32_t x876; + fiat_secp384r1_uint1 x877; + uint32_t x878; + fiat_secp384r1_uint1 x879; + uint32_t x880; + fiat_secp384r1_uint1 x881; + uint32_t x882; + fiat_secp384r1_uint1 x883; + uint32_t x884; + fiat_secp384r1_uint1 x885; + uint32_t x886; + fiat_secp384r1_uint1 x887; + uint32_t x888; + fiat_secp384r1_uint1 x889; + uint32_t x890; + fiat_secp384r1_uint1 x891; + uint32_t x892; + fiat_secp384r1_uint1 x893; + uint32_t x894; + fiat_secp384r1_uint1 x895; + uint32_t x896; + fiat_secp384r1_uint1 x897; + uint32_t x898; + fiat_secp384r1_uint1 x899; + uint32_t x900; + fiat_secp384r1_uint1 x901; + uint32_t x902; + uint32_t x903; + uint32_t x904; + uint32_t x905; + uint32_t x906; + uint32_t x907; + uint32_t x908; + uint32_t x909; + uint32_t x910; + uint32_t x911; + uint32_t x912; + uint32_t x913; + uint32_t x914; + uint32_t x915; + uint32_t x916; + uint32_t x917; + uint32_t x918; + uint32_t x919; + uint32_t x920; + uint32_t x921; + uint32_t x922; + fiat_secp384r1_uint1 x923; + uint32_t x924; + fiat_secp384r1_uint1 x925; + uint32_t x926; + fiat_secp384r1_uint1 x927; + uint32_t x928; + fiat_secp384r1_uint1 x929; + uint32_t x930; + fiat_secp384r1_uint1 x931; + uint32_t x932; + fiat_secp384r1_uint1 x933; + uint32_t x934; + fiat_secp384r1_uint1 x935; + uint32_t x936; + fiat_secp384r1_uint1 x937; + uint32_t x938; + fiat_secp384r1_uint1 x939; + uint32_t x940; + fiat_secp384r1_uint1 x941; + uint32_t x942; + fiat_secp384r1_uint1 x943; + uint32_t x944; + fiat_secp384r1_uint1 x945; + uint32_t x946; + fiat_secp384r1_uint1 x947; + uint32_t x948; + fiat_secp384r1_uint1 x949; + uint32_t x950; + fiat_secp384r1_uint1 x951; + uint32_t x952; + fiat_secp384r1_uint1 x953; + uint32_t x954; + fiat_secp384r1_uint1 x955; + uint32_t x956; + fiat_secp384r1_uint1 x957; + uint32_t x958; + fiat_secp384r1_uint1 x959; + uint32_t x960; + fiat_secp384r1_uint1 x961; + uint32_t x962; + fiat_secp384r1_uint1 x963; + uint32_t x964; + fiat_secp384r1_uint1 x965; + uint32_t x966; + fiat_secp384r1_uint1 x967; + uint32_t x968; + fiat_secp384r1_uint1 x969; + uint32_t x970; + fiat_secp384r1_uint1 x971; + uint32_t x972; + fiat_secp384r1_uint1 x973; + uint32_t x974; + fiat_secp384r1_uint1 x975; + uint32_t x976; + fiat_secp384r1_uint1 x977; + uint32_t x978; + fiat_secp384r1_uint1 x979; + uint32_t x980; + fiat_secp384r1_uint1 x981; + uint32_t x982; + fiat_secp384r1_uint1 x983; + uint32_t x984; + fiat_secp384r1_uint1 x985; + uint32_t x986; + fiat_secp384r1_uint1 x987; + uint32_t x988; + fiat_secp384r1_uint1 x989; + uint32_t x990; + uint32_t x991; + uint32_t x992; + uint32_t x993; + uint32_t x994; + uint32_t x995; + uint32_t x996; + uint32_t x997; + uint32_t x998; + uint32_t x999; + uint32_t x1000; + uint32_t x1001; + x1 = (arg1[0]); + fiat_secp384r1_mulx_u32(&x2, &x3, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x4, &x5, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x6, &x7, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x8, &x9, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x10, &x11, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x12, &x13, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x14, &x15, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x16, &x17, x1, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x18, &x19, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x20, &x21, x1, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x22, &x23, 0x0, x19, x16); + fiat_secp384r1_addcarryx_u32(&x24, &x25, x23, x17, x14); + fiat_secp384r1_addcarryx_u32(&x26, &x27, x25, x15, x12); + fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, x13, x10); + fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, x11, x8); + fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, x9, x6); + fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, x7, x4); + fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, x5, x2); + fiat_secp384r1_addcarryx_u32(&x38, &x39, 0x0, x1, x20); + fiat_secp384r1_addcarryx_u32(&x40, &x41, 0x0, (x39 + x21), (arg1[1])); + fiat_secp384r1_mulx_u32(&x42, &x43, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x44, &x45, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x46, &x47, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x48, &x49, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x50, &x51, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x52, &x53, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x54, &x55, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x56, &x57, x40, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x58, &x59, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x60, &x61, x40, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x62, &x63, 0x0, x59, x56); + fiat_secp384r1_addcarryx_u32(&x64, &x65, x63, x57, x54); + fiat_secp384r1_addcarryx_u32(&x66, &x67, x65, x55, x52); + fiat_secp384r1_addcarryx_u32(&x68, &x69, x67, x53, x50); + fiat_secp384r1_addcarryx_u32(&x70, &x71, x69, x51, x48); + fiat_secp384r1_addcarryx_u32(&x72, &x73, x71, x49, x46); + fiat_secp384r1_addcarryx_u32(&x74, &x75, x73, x47, x44); + fiat_secp384r1_addcarryx_u32(&x76, &x77, x75, x45, x42); + fiat_secp384r1_addcarryx_u32(&x78, &x79, 0x0, x40, x60); + fiat_secp384r1_addcarryx_u32(&x80, &x81, x79, x41, x61); + fiat_secp384r1_addcarryx_u32(&x82, &x83, x81, x18, 0x0); + fiat_secp384r1_addcarryx_u32(&x84, &x85, x83, x22, x58); + fiat_secp384r1_addcarryx_u32(&x86, &x87, x85, x24, x62); + fiat_secp384r1_addcarryx_u32(&x88, &x89, x87, x26, x64); + fiat_secp384r1_addcarryx_u32(&x90, &x91, x89, x28, x66); + fiat_secp384r1_addcarryx_u32(&x92, &x93, x91, x30, x68); + fiat_secp384r1_addcarryx_u32(&x94, &x95, x93, x32, x70); + fiat_secp384r1_addcarryx_u32(&x96, &x97, x95, x34, x72); + fiat_secp384r1_addcarryx_u32(&x98, &x99, x97, x36, x74); + fiat_secp384r1_addcarryx_u32(&x100, &x101, x99, (x37 + x3), x76); + fiat_secp384r1_addcarryx_u32(&x102, &x103, x101, 0x0, (x77 + x43)); + fiat_secp384r1_addcarryx_u32(&x104, &x105, 0x0, x80, (arg1[2])); + fiat_secp384r1_addcarryx_u32(&x106, &x107, x105, x82, 0x0); + fiat_secp384r1_addcarryx_u32(&x108, &x109, x107, x84, 0x0); + fiat_secp384r1_addcarryx_u32(&x110, &x111, x109, x86, 0x0); + fiat_secp384r1_addcarryx_u32(&x112, &x113, x111, x88, 0x0); + fiat_secp384r1_addcarryx_u32(&x114, &x115, x113, x90, 0x0); + fiat_secp384r1_addcarryx_u32(&x116, &x117, x115, x92, 0x0); + fiat_secp384r1_addcarryx_u32(&x118, &x119, x117, x94, 0x0); + fiat_secp384r1_addcarryx_u32(&x120, &x121, x119, x96, 0x0); + fiat_secp384r1_addcarryx_u32(&x122, &x123, x121, x98, 0x0); + fiat_secp384r1_addcarryx_u32(&x124, &x125, x123, x100, 0x0); + fiat_secp384r1_addcarryx_u32(&x126, &x127, x125, x102, 0x0); + fiat_secp384r1_mulx_u32(&x128, &x129, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x130, &x131, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x132, &x133, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x134, &x135, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x136, &x137, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x138, &x139, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x140, &x141, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x142, &x143, x104, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x144, &x145, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x146, &x147, x104, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x148, &x149, 0x0, x145, x142); + fiat_secp384r1_addcarryx_u32(&x150, &x151, x149, x143, x140); + fiat_secp384r1_addcarryx_u32(&x152, &x153, x151, x141, x138); + fiat_secp384r1_addcarryx_u32(&x154, &x155, x153, x139, x136); + fiat_secp384r1_addcarryx_u32(&x156, &x157, x155, x137, x134); + fiat_secp384r1_addcarryx_u32(&x158, &x159, x157, x135, x132); + fiat_secp384r1_addcarryx_u32(&x160, &x161, x159, x133, x130); + fiat_secp384r1_addcarryx_u32(&x162, &x163, x161, x131, x128); + fiat_secp384r1_addcarryx_u32(&x164, &x165, 0x0, x104, x146); + fiat_secp384r1_addcarryx_u32(&x166, &x167, x165, x106, x147); + fiat_secp384r1_addcarryx_u32(&x168, &x169, x167, x108, 0x0); + fiat_secp384r1_addcarryx_u32(&x170, &x171, x169, x110, x144); + fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x112, x148); + fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x114, x150); + fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x116, x152); + fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x118, x154); + fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x120, x156); + fiat_secp384r1_addcarryx_u32(&x182, &x183, x181, x122, x158); + fiat_secp384r1_addcarryx_u32(&x184, &x185, x183, x124, x160); + fiat_secp384r1_addcarryx_u32(&x186, &x187, x185, x126, x162); + fiat_secp384r1_addcarryx_u32(&x188, &x189, x187, ((uint32_t)x127 + x103), + (x163 + x129)); + fiat_secp384r1_addcarryx_u32(&x190, &x191, 0x0, x166, (arg1[3])); + fiat_secp384r1_addcarryx_u32(&x192, &x193, x191, x168, 0x0); + fiat_secp384r1_addcarryx_u32(&x194, &x195, x193, x170, 0x0); + fiat_secp384r1_addcarryx_u32(&x196, &x197, x195, x172, 0x0); + fiat_secp384r1_addcarryx_u32(&x198, &x199, x197, x174, 0x0); + fiat_secp384r1_addcarryx_u32(&x200, &x201, x199, x176, 0x0); + fiat_secp384r1_addcarryx_u32(&x202, &x203, x201, x178, 0x0); + fiat_secp384r1_addcarryx_u32(&x204, &x205, x203, x180, 0x0); + fiat_secp384r1_addcarryx_u32(&x206, &x207, x205, x182, 0x0); + fiat_secp384r1_addcarryx_u32(&x208, &x209, x207, x184, 0x0); + fiat_secp384r1_addcarryx_u32(&x210, &x211, x209, x186, 0x0); + fiat_secp384r1_addcarryx_u32(&x212, &x213, x211, x188, 0x0); + fiat_secp384r1_mulx_u32(&x214, &x215, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x216, &x217, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x218, &x219, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x220, &x221, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x222, &x223, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x224, &x225, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x226, &x227, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x228, &x229, x190, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x230, &x231, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x232, &x233, x190, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x234, &x235, 0x0, x231, x228); + fiat_secp384r1_addcarryx_u32(&x236, &x237, x235, x229, x226); + fiat_secp384r1_addcarryx_u32(&x238, &x239, x237, x227, x224); + fiat_secp384r1_addcarryx_u32(&x240, &x241, x239, x225, x222); + fiat_secp384r1_addcarryx_u32(&x242, &x243, x241, x223, x220); + fiat_secp384r1_addcarryx_u32(&x244, &x245, x243, x221, x218); + fiat_secp384r1_addcarryx_u32(&x246, &x247, x245, x219, x216); + fiat_secp384r1_addcarryx_u32(&x248, &x249, x247, x217, x214); + fiat_secp384r1_addcarryx_u32(&x250, &x251, 0x0, x190, x232); + fiat_secp384r1_addcarryx_u32(&x252, &x253, x251, x192, x233); + fiat_secp384r1_addcarryx_u32(&x254, &x255, x253, x194, 0x0); + fiat_secp384r1_addcarryx_u32(&x256, &x257, x255, x196, x230); + fiat_secp384r1_addcarryx_u32(&x258, &x259, x257, x198, x234); + fiat_secp384r1_addcarryx_u32(&x260, &x261, x259, x200, x236); + fiat_secp384r1_addcarryx_u32(&x262, &x263, x261, x202, x238); + fiat_secp384r1_addcarryx_u32(&x264, &x265, x263, x204, x240); + fiat_secp384r1_addcarryx_u32(&x266, &x267, x265, x206, x242); + fiat_secp384r1_addcarryx_u32(&x268, &x269, x267, x208, x244); + fiat_secp384r1_addcarryx_u32(&x270, &x271, x269, x210, x246); + fiat_secp384r1_addcarryx_u32(&x272, &x273, x271, x212, x248); + fiat_secp384r1_addcarryx_u32(&x274, &x275, x273, ((uint32_t)x213 + x189), + (x249 + x215)); + fiat_secp384r1_addcarryx_u32(&x276, &x277, 0x0, x252, (arg1[4])); + fiat_secp384r1_addcarryx_u32(&x278, &x279, x277, x254, 0x0); + fiat_secp384r1_addcarryx_u32(&x280, &x281, x279, x256, 0x0); + fiat_secp384r1_addcarryx_u32(&x282, &x283, x281, x258, 0x0); + fiat_secp384r1_addcarryx_u32(&x284, &x285, x283, x260, 0x0); + fiat_secp384r1_addcarryx_u32(&x286, &x287, x285, x262, 0x0); + fiat_secp384r1_addcarryx_u32(&x288, &x289, x287, x264, 0x0); + fiat_secp384r1_addcarryx_u32(&x290, &x291, x289, x266, 0x0); + fiat_secp384r1_addcarryx_u32(&x292, &x293, x291, x268, 0x0); + fiat_secp384r1_addcarryx_u32(&x294, &x295, x293, x270, 0x0); + fiat_secp384r1_addcarryx_u32(&x296, &x297, x295, x272, 0x0); + fiat_secp384r1_addcarryx_u32(&x298, &x299, x297, x274, 0x0); + fiat_secp384r1_mulx_u32(&x300, &x301, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x302, &x303, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x304, &x305, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x306, &x307, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x308, &x309, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x310, &x311, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x312, &x313, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x314, &x315, x276, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x316, &x317, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x318, &x319, x276, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x320, &x321, 0x0, x317, x314); + fiat_secp384r1_addcarryx_u32(&x322, &x323, x321, x315, x312); + fiat_secp384r1_addcarryx_u32(&x324, &x325, x323, x313, x310); + fiat_secp384r1_addcarryx_u32(&x326, &x327, x325, x311, x308); + fiat_secp384r1_addcarryx_u32(&x328, &x329, x327, x309, x306); + fiat_secp384r1_addcarryx_u32(&x330, &x331, x329, x307, x304); + fiat_secp384r1_addcarryx_u32(&x332, &x333, x331, x305, x302); + fiat_secp384r1_addcarryx_u32(&x334, &x335, x333, x303, x300); + fiat_secp384r1_addcarryx_u32(&x336, &x337, 0x0, x276, x318); + fiat_secp384r1_addcarryx_u32(&x338, &x339, x337, x278, x319); + fiat_secp384r1_addcarryx_u32(&x340, &x341, x339, x280, 0x0); + fiat_secp384r1_addcarryx_u32(&x342, &x343, x341, x282, x316); + fiat_secp384r1_addcarryx_u32(&x344, &x345, x343, x284, x320); + fiat_secp384r1_addcarryx_u32(&x346, &x347, x345, x286, x322); + fiat_secp384r1_addcarryx_u32(&x348, &x349, x347, x288, x324); + fiat_secp384r1_addcarryx_u32(&x350, &x351, x349, x290, x326); + fiat_secp384r1_addcarryx_u32(&x352, &x353, x351, x292, x328); + fiat_secp384r1_addcarryx_u32(&x354, &x355, x353, x294, x330); + fiat_secp384r1_addcarryx_u32(&x356, &x357, x355, x296, x332); + fiat_secp384r1_addcarryx_u32(&x358, &x359, x357, x298, x334); + fiat_secp384r1_addcarryx_u32(&x360, &x361, x359, ((uint32_t)x299 + x275), + (x335 + x301)); + fiat_secp384r1_addcarryx_u32(&x362, &x363, 0x0, x338, (arg1[5])); + fiat_secp384r1_addcarryx_u32(&x364, &x365, x363, x340, 0x0); + fiat_secp384r1_addcarryx_u32(&x366, &x367, x365, x342, 0x0); + fiat_secp384r1_addcarryx_u32(&x368, &x369, x367, x344, 0x0); + fiat_secp384r1_addcarryx_u32(&x370, &x371, x369, x346, 0x0); + fiat_secp384r1_addcarryx_u32(&x372, &x373, x371, x348, 0x0); + fiat_secp384r1_addcarryx_u32(&x374, &x375, x373, x350, 0x0); + fiat_secp384r1_addcarryx_u32(&x376, &x377, x375, x352, 0x0); + fiat_secp384r1_addcarryx_u32(&x378, &x379, x377, x354, 0x0); + fiat_secp384r1_addcarryx_u32(&x380, &x381, x379, x356, 0x0); + fiat_secp384r1_addcarryx_u32(&x382, &x383, x381, x358, 0x0); + fiat_secp384r1_addcarryx_u32(&x384, &x385, x383, x360, 0x0); + fiat_secp384r1_mulx_u32(&x386, &x387, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x388, &x389, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x390, &x391, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x392, &x393, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x394, &x395, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x396, &x397, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x398, &x399, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x400, &x401, x362, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x402, &x403, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x404, &x405, x362, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x406, &x407, 0x0, x403, x400); + fiat_secp384r1_addcarryx_u32(&x408, &x409, x407, x401, x398); + fiat_secp384r1_addcarryx_u32(&x410, &x411, x409, x399, x396); + fiat_secp384r1_addcarryx_u32(&x412, &x413, x411, x397, x394); + fiat_secp384r1_addcarryx_u32(&x414, &x415, x413, x395, x392); + fiat_secp384r1_addcarryx_u32(&x416, &x417, x415, x393, x390); + fiat_secp384r1_addcarryx_u32(&x418, &x419, x417, x391, x388); + fiat_secp384r1_addcarryx_u32(&x420, &x421, x419, x389, x386); + fiat_secp384r1_addcarryx_u32(&x422, &x423, 0x0, x362, x404); + fiat_secp384r1_addcarryx_u32(&x424, &x425, x423, x364, x405); + fiat_secp384r1_addcarryx_u32(&x426, &x427, x425, x366, 0x0); + fiat_secp384r1_addcarryx_u32(&x428, &x429, x427, x368, x402); + fiat_secp384r1_addcarryx_u32(&x430, &x431, x429, x370, x406); + fiat_secp384r1_addcarryx_u32(&x432, &x433, x431, x372, x408); + fiat_secp384r1_addcarryx_u32(&x434, &x435, x433, x374, x410); + fiat_secp384r1_addcarryx_u32(&x436, &x437, x435, x376, x412); + fiat_secp384r1_addcarryx_u32(&x438, &x439, x437, x378, x414); + fiat_secp384r1_addcarryx_u32(&x440, &x441, x439, x380, x416); + fiat_secp384r1_addcarryx_u32(&x442, &x443, x441, x382, x418); + fiat_secp384r1_addcarryx_u32(&x444, &x445, x443, x384, x420); + fiat_secp384r1_addcarryx_u32(&x446, &x447, x445, ((uint32_t)x385 + x361), + (x421 + x387)); + fiat_secp384r1_addcarryx_u32(&x448, &x449, 0x0, x424, (arg1[6])); + fiat_secp384r1_addcarryx_u32(&x450, &x451, x449, x426, 0x0); + fiat_secp384r1_addcarryx_u32(&x452, &x453, x451, x428, 0x0); + fiat_secp384r1_addcarryx_u32(&x454, &x455, x453, x430, 0x0); + fiat_secp384r1_addcarryx_u32(&x456, &x457, x455, x432, 0x0); + fiat_secp384r1_addcarryx_u32(&x458, &x459, x457, x434, 0x0); + fiat_secp384r1_addcarryx_u32(&x460, &x461, x459, x436, 0x0); + fiat_secp384r1_addcarryx_u32(&x462, &x463, x461, x438, 0x0); + fiat_secp384r1_addcarryx_u32(&x464, &x465, x463, x440, 0x0); + fiat_secp384r1_addcarryx_u32(&x466, &x467, x465, x442, 0x0); + fiat_secp384r1_addcarryx_u32(&x468, &x469, x467, x444, 0x0); + fiat_secp384r1_addcarryx_u32(&x470, &x471, x469, x446, 0x0); + fiat_secp384r1_mulx_u32(&x472, &x473, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x474, &x475, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x476, &x477, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x478, &x479, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x480, &x481, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x482, &x483, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x484, &x485, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x486, &x487, x448, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x488, &x489, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x490, &x491, x448, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x492, &x493, 0x0, x489, x486); + fiat_secp384r1_addcarryx_u32(&x494, &x495, x493, x487, x484); + fiat_secp384r1_addcarryx_u32(&x496, &x497, x495, x485, x482); + fiat_secp384r1_addcarryx_u32(&x498, &x499, x497, x483, x480); + fiat_secp384r1_addcarryx_u32(&x500, &x501, x499, x481, x478); + fiat_secp384r1_addcarryx_u32(&x502, &x503, x501, x479, x476); + fiat_secp384r1_addcarryx_u32(&x504, &x505, x503, x477, x474); + fiat_secp384r1_addcarryx_u32(&x506, &x507, x505, x475, x472); + fiat_secp384r1_addcarryx_u32(&x508, &x509, 0x0, x448, x490); + fiat_secp384r1_addcarryx_u32(&x510, &x511, x509, x450, x491); + fiat_secp384r1_addcarryx_u32(&x512, &x513, x511, x452, 0x0); + fiat_secp384r1_addcarryx_u32(&x514, &x515, x513, x454, x488); + fiat_secp384r1_addcarryx_u32(&x516, &x517, x515, x456, x492); + fiat_secp384r1_addcarryx_u32(&x518, &x519, x517, x458, x494); + fiat_secp384r1_addcarryx_u32(&x520, &x521, x519, x460, x496); + fiat_secp384r1_addcarryx_u32(&x522, &x523, x521, x462, x498); + fiat_secp384r1_addcarryx_u32(&x524, &x525, x523, x464, x500); + fiat_secp384r1_addcarryx_u32(&x526, &x527, x525, x466, x502); + fiat_secp384r1_addcarryx_u32(&x528, &x529, x527, x468, x504); + fiat_secp384r1_addcarryx_u32(&x530, &x531, x529, x470, x506); + fiat_secp384r1_addcarryx_u32(&x532, &x533, x531, ((uint32_t)x471 + x447), + (x507 + x473)); + fiat_secp384r1_addcarryx_u32(&x534, &x535, 0x0, x510, (arg1[7])); + fiat_secp384r1_addcarryx_u32(&x536, &x537, x535, x512, 0x0); + fiat_secp384r1_addcarryx_u32(&x538, &x539, x537, x514, 0x0); + fiat_secp384r1_addcarryx_u32(&x540, &x541, x539, x516, 0x0); + fiat_secp384r1_addcarryx_u32(&x542, &x543, x541, x518, 0x0); + fiat_secp384r1_addcarryx_u32(&x544, &x545, x543, x520, 0x0); + fiat_secp384r1_addcarryx_u32(&x546, &x547, x545, x522, 0x0); + fiat_secp384r1_addcarryx_u32(&x548, &x549, x547, x524, 0x0); + fiat_secp384r1_addcarryx_u32(&x550, &x551, x549, x526, 0x0); + fiat_secp384r1_addcarryx_u32(&x552, &x553, x551, x528, 0x0); + fiat_secp384r1_addcarryx_u32(&x554, &x555, x553, x530, 0x0); + fiat_secp384r1_addcarryx_u32(&x556, &x557, x555, x532, 0x0); + fiat_secp384r1_mulx_u32(&x558, &x559, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x560, &x561, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x562, &x563, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x564, &x565, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x566, &x567, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x568, &x569, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x570, &x571, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x572, &x573, x534, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x574, &x575, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x576, &x577, x534, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x578, &x579, 0x0, x575, x572); + fiat_secp384r1_addcarryx_u32(&x580, &x581, x579, x573, x570); + fiat_secp384r1_addcarryx_u32(&x582, &x583, x581, x571, x568); + fiat_secp384r1_addcarryx_u32(&x584, &x585, x583, x569, x566); + fiat_secp384r1_addcarryx_u32(&x586, &x587, x585, x567, x564); + fiat_secp384r1_addcarryx_u32(&x588, &x589, x587, x565, x562); + fiat_secp384r1_addcarryx_u32(&x590, &x591, x589, x563, x560); + fiat_secp384r1_addcarryx_u32(&x592, &x593, x591, x561, x558); + fiat_secp384r1_addcarryx_u32(&x594, &x595, 0x0, x534, x576); + fiat_secp384r1_addcarryx_u32(&x596, &x597, x595, x536, x577); + fiat_secp384r1_addcarryx_u32(&x598, &x599, x597, x538, 0x0); + fiat_secp384r1_addcarryx_u32(&x600, &x601, x599, x540, x574); + fiat_secp384r1_addcarryx_u32(&x602, &x603, x601, x542, x578); + fiat_secp384r1_addcarryx_u32(&x604, &x605, x603, x544, x580); + fiat_secp384r1_addcarryx_u32(&x606, &x607, x605, x546, x582); + fiat_secp384r1_addcarryx_u32(&x608, &x609, x607, x548, x584); + fiat_secp384r1_addcarryx_u32(&x610, &x611, x609, x550, x586); + fiat_secp384r1_addcarryx_u32(&x612, &x613, x611, x552, x588); + fiat_secp384r1_addcarryx_u32(&x614, &x615, x613, x554, x590); + fiat_secp384r1_addcarryx_u32(&x616, &x617, x615, x556, x592); + fiat_secp384r1_addcarryx_u32(&x618, &x619, x617, ((uint32_t)x557 + x533), + (x593 + x559)); + fiat_secp384r1_addcarryx_u32(&x620, &x621, 0x0, x596, (arg1[8])); + fiat_secp384r1_addcarryx_u32(&x622, &x623, x621, x598, 0x0); + fiat_secp384r1_addcarryx_u32(&x624, &x625, x623, x600, 0x0); + fiat_secp384r1_addcarryx_u32(&x626, &x627, x625, x602, 0x0); + fiat_secp384r1_addcarryx_u32(&x628, &x629, x627, x604, 0x0); + fiat_secp384r1_addcarryx_u32(&x630, &x631, x629, x606, 0x0); + fiat_secp384r1_addcarryx_u32(&x632, &x633, x631, x608, 0x0); + fiat_secp384r1_addcarryx_u32(&x634, &x635, x633, x610, 0x0); + fiat_secp384r1_addcarryx_u32(&x636, &x637, x635, x612, 0x0); + fiat_secp384r1_addcarryx_u32(&x638, &x639, x637, x614, 0x0); + fiat_secp384r1_addcarryx_u32(&x640, &x641, x639, x616, 0x0); + fiat_secp384r1_addcarryx_u32(&x642, &x643, x641, x618, 0x0); + fiat_secp384r1_mulx_u32(&x644, &x645, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x646, &x647, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x648, &x649, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x650, &x651, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x652, &x653, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x654, &x655, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x656, &x657, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x658, &x659, x620, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x660, &x661, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x662, &x663, x620, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x664, &x665, 0x0, x661, x658); + fiat_secp384r1_addcarryx_u32(&x666, &x667, x665, x659, x656); + fiat_secp384r1_addcarryx_u32(&x668, &x669, x667, x657, x654); + fiat_secp384r1_addcarryx_u32(&x670, &x671, x669, x655, x652); + fiat_secp384r1_addcarryx_u32(&x672, &x673, x671, x653, x650); + fiat_secp384r1_addcarryx_u32(&x674, &x675, x673, x651, x648); + fiat_secp384r1_addcarryx_u32(&x676, &x677, x675, x649, x646); + fiat_secp384r1_addcarryx_u32(&x678, &x679, x677, x647, x644); + fiat_secp384r1_addcarryx_u32(&x680, &x681, 0x0, x620, x662); + fiat_secp384r1_addcarryx_u32(&x682, &x683, x681, x622, x663); + fiat_secp384r1_addcarryx_u32(&x684, &x685, x683, x624, 0x0); + fiat_secp384r1_addcarryx_u32(&x686, &x687, x685, x626, x660); + fiat_secp384r1_addcarryx_u32(&x688, &x689, x687, x628, x664); + fiat_secp384r1_addcarryx_u32(&x690, &x691, x689, x630, x666); + fiat_secp384r1_addcarryx_u32(&x692, &x693, x691, x632, x668); + fiat_secp384r1_addcarryx_u32(&x694, &x695, x693, x634, x670); + fiat_secp384r1_addcarryx_u32(&x696, &x697, x695, x636, x672); + fiat_secp384r1_addcarryx_u32(&x698, &x699, x697, x638, x674); + fiat_secp384r1_addcarryx_u32(&x700, &x701, x699, x640, x676); + fiat_secp384r1_addcarryx_u32(&x702, &x703, x701, x642, x678); + fiat_secp384r1_addcarryx_u32(&x704, &x705, x703, ((uint32_t)x643 + x619), + (x679 + x645)); + fiat_secp384r1_addcarryx_u32(&x706, &x707, 0x0, x682, (arg1[9])); + fiat_secp384r1_addcarryx_u32(&x708, &x709, x707, x684, 0x0); + fiat_secp384r1_addcarryx_u32(&x710, &x711, x709, x686, 0x0); + fiat_secp384r1_addcarryx_u32(&x712, &x713, x711, x688, 0x0); + fiat_secp384r1_addcarryx_u32(&x714, &x715, x713, x690, 0x0); + fiat_secp384r1_addcarryx_u32(&x716, &x717, x715, x692, 0x0); + fiat_secp384r1_addcarryx_u32(&x718, &x719, x717, x694, 0x0); + fiat_secp384r1_addcarryx_u32(&x720, &x721, x719, x696, 0x0); + fiat_secp384r1_addcarryx_u32(&x722, &x723, x721, x698, 0x0); + fiat_secp384r1_addcarryx_u32(&x724, &x725, x723, x700, 0x0); + fiat_secp384r1_addcarryx_u32(&x726, &x727, x725, x702, 0x0); + fiat_secp384r1_addcarryx_u32(&x728, &x729, x727, x704, 0x0); + fiat_secp384r1_mulx_u32(&x730, &x731, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x732, &x733, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x734, &x735, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x736, &x737, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x738, &x739, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x740, &x741, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x742, &x743, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x744, &x745, x706, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x746, &x747, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x748, &x749, x706, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x750, &x751, 0x0, x747, x744); + fiat_secp384r1_addcarryx_u32(&x752, &x753, x751, x745, x742); + fiat_secp384r1_addcarryx_u32(&x754, &x755, x753, x743, x740); + fiat_secp384r1_addcarryx_u32(&x756, &x757, x755, x741, x738); + fiat_secp384r1_addcarryx_u32(&x758, &x759, x757, x739, x736); + fiat_secp384r1_addcarryx_u32(&x760, &x761, x759, x737, x734); + fiat_secp384r1_addcarryx_u32(&x762, &x763, x761, x735, x732); + fiat_secp384r1_addcarryx_u32(&x764, &x765, x763, x733, x730); + fiat_secp384r1_addcarryx_u32(&x766, &x767, 0x0, x706, x748); + fiat_secp384r1_addcarryx_u32(&x768, &x769, x767, x708, x749); + fiat_secp384r1_addcarryx_u32(&x770, &x771, x769, x710, 0x0); + fiat_secp384r1_addcarryx_u32(&x772, &x773, x771, x712, x746); + fiat_secp384r1_addcarryx_u32(&x774, &x775, x773, x714, x750); + fiat_secp384r1_addcarryx_u32(&x776, &x777, x775, x716, x752); + fiat_secp384r1_addcarryx_u32(&x778, &x779, x777, x718, x754); + fiat_secp384r1_addcarryx_u32(&x780, &x781, x779, x720, x756); + fiat_secp384r1_addcarryx_u32(&x782, &x783, x781, x722, x758); + fiat_secp384r1_addcarryx_u32(&x784, &x785, x783, x724, x760); + fiat_secp384r1_addcarryx_u32(&x786, &x787, x785, x726, x762); + fiat_secp384r1_addcarryx_u32(&x788, &x789, x787, x728, x764); + fiat_secp384r1_addcarryx_u32(&x790, &x791, x789, ((uint32_t)x729 + x705), + (x765 + x731)); + fiat_secp384r1_addcarryx_u32(&x792, &x793, 0x0, x768, (arg1[10])); + fiat_secp384r1_addcarryx_u32(&x794, &x795, x793, x770, 0x0); + fiat_secp384r1_addcarryx_u32(&x796, &x797, x795, x772, 0x0); + fiat_secp384r1_addcarryx_u32(&x798, &x799, x797, x774, 0x0); + fiat_secp384r1_addcarryx_u32(&x800, &x801, x799, x776, 0x0); + fiat_secp384r1_addcarryx_u32(&x802, &x803, x801, x778, 0x0); + fiat_secp384r1_addcarryx_u32(&x804, &x805, x803, x780, 0x0); + fiat_secp384r1_addcarryx_u32(&x806, &x807, x805, x782, 0x0); + fiat_secp384r1_addcarryx_u32(&x808, &x809, x807, x784, 0x0); + fiat_secp384r1_addcarryx_u32(&x810, &x811, x809, x786, 0x0); + fiat_secp384r1_addcarryx_u32(&x812, &x813, x811, x788, 0x0); + fiat_secp384r1_addcarryx_u32(&x814, &x815, x813, x790, 0x0); + fiat_secp384r1_mulx_u32(&x816, &x817, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x818, &x819, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x820, &x821, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x822, &x823, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x824, &x825, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x826, &x827, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x828, &x829, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x830, &x831, x792, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x832, &x833, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x834, &x835, x792, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x836, &x837, 0x0, x833, x830); + fiat_secp384r1_addcarryx_u32(&x838, &x839, x837, x831, x828); + fiat_secp384r1_addcarryx_u32(&x840, &x841, x839, x829, x826); + fiat_secp384r1_addcarryx_u32(&x842, &x843, x841, x827, x824); + fiat_secp384r1_addcarryx_u32(&x844, &x845, x843, x825, x822); + fiat_secp384r1_addcarryx_u32(&x846, &x847, x845, x823, x820); + fiat_secp384r1_addcarryx_u32(&x848, &x849, x847, x821, x818); + fiat_secp384r1_addcarryx_u32(&x850, &x851, x849, x819, x816); + fiat_secp384r1_addcarryx_u32(&x852, &x853, 0x0, x792, x834); + fiat_secp384r1_addcarryx_u32(&x854, &x855, x853, x794, x835); + fiat_secp384r1_addcarryx_u32(&x856, &x857, x855, x796, 0x0); + fiat_secp384r1_addcarryx_u32(&x858, &x859, x857, x798, x832); + fiat_secp384r1_addcarryx_u32(&x860, &x861, x859, x800, x836); + fiat_secp384r1_addcarryx_u32(&x862, &x863, x861, x802, x838); + fiat_secp384r1_addcarryx_u32(&x864, &x865, x863, x804, x840); + fiat_secp384r1_addcarryx_u32(&x866, &x867, x865, x806, x842); + fiat_secp384r1_addcarryx_u32(&x868, &x869, x867, x808, x844); + fiat_secp384r1_addcarryx_u32(&x870, &x871, x869, x810, x846); + fiat_secp384r1_addcarryx_u32(&x872, &x873, x871, x812, x848); + fiat_secp384r1_addcarryx_u32(&x874, &x875, x873, x814, x850); + fiat_secp384r1_addcarryx_u32(&x876, &x877, x875, ((uint32_t)x815 + x791), + (x851 + x817)); + fiat_secp384r1_addcarryx_u32(&x878, &x879, 0x0, x854, (arg1[11])); + fiat_secp384r1_addcarryx_u32(&x880, &x881, x879, x856, 0x0); + fiat_secp384r1_addcarryx_u32(&x882, &x883, x881, x858, 0x0); + fiat_secp384r1_addcarryx_u32(&x884, &x885, x883, x860, 0x0); + fiat_secp384r1_addcarryx_u32(&x886, &x887, x885, x862, 0x0); + fiat_secp384r1_addcarryx_u32(&x888, &x889, x887, x864, 0x0); + fiat_secp384r1_addcarryx_u32(&x890, &x891, x889, x866, 0x0); + fiat_secp384r1_addcarryx_u32(&x892, &x893, x891, x868, 0x0); + fiat_secp384r1_addcarryx_u32(&x894, &x895, x893, x870, 0x0); + fiat_secp384r1_addcarryx_u32(&x896, &x897, x895, x872, 0x0); + fiat_secp384r1_addcarryx_u32(&x898, &x899, x897, x874, 0x0); + fiat_secp384r1_addcarryx_u32(&x900, &x901, x899, x876, 0x0); + fiat_secp384r1_mulx_u32(&x902, &x903, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x904, &x905, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x906, &x907, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x908, &x909, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x910, &x911, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x912, &x913, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x914, &x915, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x916, &x917, x878, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x918, &x919, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x920, &x921, x878, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x922, &x923, 0x0, x919, x916); + fiat_secp384r1_addcarryx_u32(&x924, &x925, x923, x917, x914); + fiat_secp384r1_addcarryx_u32(&x926, &x927, x925, x915, x912); + fiat_secp384r1_addcarryx_u32(&x928, &x929, x927, x913, x910); + fiat_secp384r1_addcarryx_u32(&x930, &x931, x929, x911, x908); + fiat_secp384r1_addcarryx_u32(&x932, &x933, x931, x909, x906); + fiat_secp384r1_addcarryx_u32(&x934, &x935, x933, x907, x904); + fiat_secp384r1_addcarryx_u32(&x936, &x937, x935, x905, x902); + fiat_secp384r1_addcarryx_u32(&x938, &x939, 0x0, x878, x920); + fiat_secp384r1_addcarryx_u32(&x940, &x941, x939, x880, x921); + fiat_secp384r1_addcarryx_u32(&x942, &x943, x941, x882, 0x0); + fiat_secp384r1_addcarryx_u32(&x944, &x945, x943, x884, x918); + fiat_secp384r1_addcarryx_u32(&x946, &x947, x945, x886, x922); + fiat_secp384r1_addcarryx_u32(&x948, &x949, x947, x888, x924); + fiat_secp384r1_addcarryx_u32(&x950, &x951, x949, x890, x926); + fiat_secp384r1_addcarryx_u32(&x952, &x953, x951, x892, x928); + fiat_secp384r1_addcarryx_u32(&x954, &x955, x953, x894, x930); + fiat_secp384r1_addcarryx_u32(&x956, &x957, x955, x896, x932); + fiat_secp384r1_addcarryx_u32(&x958, &x959, x957, x898, x934); + fiat_secp384r1_addcarryx_u32(&x960, &x961, x959, x900, x936); + fiat_secp384r1_addcarryx_u32(&x962, &x963, x961, ((uint32_t)x901 + x877), + (x937 + x903)); + fiat_secp384r1_subborrowx_u32(&x964, &x965, 0x0, x940, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x966, &x967, x965, x942, 0x0); + fiat_secp384r1_subborrowx_u32(&x968, &x969, x967, x944, 0x0); + fiat_secp384r1_subborrowx_u32(&x970, &x971, x969, x946, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x972, &x973, x971, x948, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x974, &x975, x973, x950, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x976, &x977, x975, x952, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x978, &x979, x977, x954, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x980, &x981, x979, x956, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x982, &x983, x981, x958, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x984, &x985, x983, x960, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x986, &x987, x985, x962, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x988, &x989, x987, x963, 0x0); + fiat_secp384r1_cmovznz_u32(&x990, x989, x964, x940); + fiat_secp384r1_cmovznz_u32(&x991, x989, x966, x942); + fiat_secp384r1_cmovznz_u32(&x992, x989, x968, x944); + fiat_secp384r1_cmovznz_u32(&x993, x989, x970, x946); + fiat_secp384r1_cmovznz_u32(&x994, x989, x972, x948); + fiat_secp384r1_cmovznz_u32(&x995, x989, x974, x950); + fiat_secp384r1_cmovznz_u32(&x996, x989, x976, x952); + fiat_secp384r1_cmovznz_u32(&x997, x989, x978, x954); + fiat_secp384r1_cmovznz_u32(&x998, x989, x980, x956); + fiat_secp384r1_cmovznz_u32(&x999, x989, x982, x958); + fiat_secp384r1_cmovznz_u32(&x1000, x989, x984, x960); + fiat_secp384r1_cmovznz_u32(&x1001, x989, x986, x962); + out1[0] = x990; + out1[1] = x991; + out1[2] = x992; + out1[3] = x993; + out1[4] = x994; + out1[5] = x995; + out1[6] = x996; + out1[7] = x997; + out1[8] = x998; + out1[9] = x999; + out1[10] = x1000; + out1[11] = x1001; +} + +/* + * The function fiat_secp384r1_to_montgomery translates a field element into the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = eval arg1 mod m + * 0 ≤ eval out1 < m + * + */ +static void +fiat_secp384r1_to_montgomery( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_non_montgomery_domain_field_element arg1) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + fiat_secp384r1_uint1 x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; + uint32_t x41; + uint32_t x42; + uint32_t x43; + fiat_secp384r1_uint1 x44; + uint32_t x45; + fiat_secp384r1_uint1 x46; + uint32_t x47; + fiat_secp384r1_uint1 x48; + uint32_t x49; + fiat_secp384r1_uint1 x50; + uint32_t x51; + fiat_secp384r1_uint1 x52; + uint32_t x53; + fiat_secp384r1_uint1 x54; + uint32_t x55; + fiat_secp384r1_uint1 x56; + uint32_t x57; + fiat_secp384r1_uint1 x58; + uint32_t x59; + fiat_secp384r1_uint1 x60; + uint32_t x61; + fiat_secp384r1_uint1 x62; + uint32_t x63; + fiat_secp384r1_uint1 x64; + uint32_t x65; + fiat_secp384r1_uint1 x66; + uint32_t x67; + fiat_secp384r1_uint1 x68; + uint32_t x69; + fiat_secp384r1_uint1 x70; + uint32_t x71; + fiat_secp384r1_uint1 x72; + uint32_t x73; + fiat_secp384r1_uint1 x74; + uint32_t x75; + fiat_secp384r1_uint1 x76; + uint32_t x77; + fiat_secp384r1_uint1 x78; + uint32_t x79; + fiat_secp384r1_uint1 x80; + uint32_t x81; + fiat_secp384r1_uint1 x82; + uint32_t x83; + uint32_t x84; + uint32_t x85; + uint32_t x86; + uint32_t x87; + uint32_t x88; + uint32_t x89; + uint32_t x90; + uint32_t x91; + fiat_secp384r1_uint1 x92; + uint32_t x93; + fiat_secp384r1_uint1 x94; + uint32_t x95; + fiat_secp384r1_uint1 x96; + uint32_t x97; + fiat_secp384r1_uint1 x98; + uint32_t x99; + fiat_secp384r1_uint1 x100; + uint32_t x101; + fiat_secp384r1_uint1 x102; + uint32_t x103; + fiat_secp384r1_uint1 x104; + uint32_t x105; + fiat_secp384r1_uint1 x106; + uint32_t x107; + fiat_secp384r1_uint1 x108; + uint32_t x109; + fiat_secp384r1_uint1 x110; + uint32_t x111; + fiat_secp384r1_uint1 x112; + uint32_t x113; + fiat_secp384r1_uint1 x114; + uint32_t x115; + fiat_secp384r1_uint1 x116; + uint32_t x117; + uint32_t x118; + uint32_t x119; + uint32_t x120; + uint32_t x121; + uint32_t x122; + uint32_t x123; + uint32_t x124; + uint32_t x125; + uint32_t x126; + uint32_t x127; + uint32_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint32_t x133; + uint32_t x134; + uint32_t x135; + uint32_t x136; + uint32_t x137; + fiat_secp384r1_uint1 x138; + uint32_t x139; + fiat_secp384r1_uint1 x140; + uint32_t x141; + fiat_secp384r1_uint1 x142; + uint32_t x143; + fiat_secp384r1_uint1 x144; + uint32_t x145; + fiat_secp384r1_uint1 x146; + uint32_t x147; + fiat_secp384r1_uint1 x148; + uint32_t x149; + fiat_secp384r1_uint1 x150; + uint32_t x151; + fiat_secp384r1_uint1 x152; + uint32_t x153; + fiat_secp384r1_uint1 x154; + uint32_t x155; + fiat_secp384r1_uint1 x156; + uint32_t x157; + fiat_secp384r1_uint1 x158; + uint32_t x159; + fiat_secp384r1_uint1 x160; + uint32_t x161; + fiat_secp384r1_uint1 x162; + uint32_t x163; + fiat_secp384r1_uint1 x164; + uint32_t x165; + fiat_secp384r1_uint1 x166; + uint32_t x167; + fiat_secp384r1_uint1 x168; + uint32_t x169; + fiat_secp384r1_uint1 x170; + uint32_t x171; + fiat_secp384r1_uint1 x172; + uint32_t x173; + fiat_secp384r1_uint1 x174; + uint32_t x175; + fiat_secp384r1_uint1 x176; + uint32_t x177; + fiat_secp384r1_uint1 x178; + uint32_t x179; + uint32_t x180; + uint32_t x181; + uint32_t x182; + uint32_t x183; + uint32_t x184; + uint32_t x185; + uint32_t x186; + uint32_t x187; + fiat_secp384r1_uint1 x188; + uint32_t x189; + fiat_secp384r1_uint1 x190; + uint32_t x191; + fiat_secp384r1_uint1 x192; + uint32_t x193; + fiat_secp384r1_uint1 x194; + uint32_t x195; + fiat_secp384r1_uint1 x196; + uint32_t x197; + fiat_secp384r1_uint1 x198; + uint32_t x199; + fiat_secp384r1_uint1 x200; + uint32_t x201; + fiat_secp384r1_uint1 x202; + uint32_t x203; + fiat_secp384r1_uint1 x204; + uint32_t x205; + fiat_secp384r1_uint1 x206; + uint32_t x207; + fiat_secp384r1_uint1 x208; + uint32_t x209; + fiat_secp384r1_uint1 x210; + uint32_t x211; + fiat_secp384r1_uint1 x212; + uint32_t x213; + uint32_t x214; + uint32_t x215; + uint32_t x216; + uint32_t x217; + uint32_t x218; + uint32_t x219; + uint32_t x220; + uint32_t x221; + uint32_t x222; + uint32_t x223; + uint32_t x224; + uint32_t x225; + uint32_t x226; + uint32_t x227; + uint32_t x228; + uint32_t x229; + uint32_t x230; + uint32_t x231; + uint32_t x232; + uint32_t x233; + fiat_secp384r1_uint1 x234; + uint32_t x235; + fiat_secp384r1_uint1 x236; + uint32_t x237; + fiat_secp384r1_uint1 x238; + uint32_t x239; + fiat_secp384r1_uint1 x240; + uint32_t x241; + fiat_secp384r1_uint1 x242; + uint32_t x243; + fiat_secp384r1_uint1 x244; + uint32_t x245; + fiat_secp384r1_uint1 x246; + uint32_t x247; + fiat_secp384r1_uint1 x248; + uint32_t x249; + fiat_secp384r1_uint1 x250; + uint32_t x251; + fiat_secp384r1_uint1 x252; + uint32_t x253; + fiat_secp384r1_uint1 x254; + uint32_t x255; + fiat_secp384r1_uint1 x256; + uint32_t x257; + fiat_secp384r1_uint1 x258; + uint32_t x259; + fiat_secp384r1_uint1 x260; + uint32_t x261; + fiat_secp384r1_uint1 x262; + uint32_t x263; + fiat_secp384r1_uint1 x264; + uint32_t x265; + fiat_secp384r1_uint1 x266; + uint32_t x267; + fiat_secp384r1_uint1 x268; + uint32_t x269; + fiat_secp384r1_uint1 x270; + uint32_t x271; + fiat_secp384r1_uint1 x272; + uint32_t x273; + fiat_secp384r1_uint1 x274; + uint32_t x275; + uint32_t x276; + uint32_t x277; + uint32_t x278; + uint32_t x279; + uint32_t x280; + uint32_t x281; + uint32_t x282; + uint32_t x283; + fiat_secp384r1_uint1 x284; + uint32_t x285; + fiat_secp384r1_uint1 x286; + uint32_t x287; + fiat_secp384r1_uint1 x288; + uint32_t x289; + fiat_secp384r1_uint1 x290; + uint32_t x291; + fiat_secp384r1_uint1 x292; + uint32_t x293; + fiat_secp384r1_uint1 x294; + uint32_t x295; + fiat_secp384r1_uint1 x296; + uint32_t x297; + fiat_secp384r1_uint1 x298; + uint32_t x299; + fiat_secp384r1_uint1 x300; + uint32_t x301; + fiat_secp384r1_uint1 x302; + uint32_t x303; + fiat_secp384r1_uint1 x304; + uint32_t x305; + fiat_secp384r1_uint1 x306; + uint32_t x307; + fiat_secp384r1_uint1 x308; + uint32_t x309; + uint32_t x310; + uint32_t x311; + uint32_t x312; + uint32_t x313; + uint32_t x314; + uint32_t x315; + uint32_t x316; + uint32_t x317; + uint32_t x318; + uint32_t x319; + uint32_t x320; + uint32_t x321; + uint32_t x322; + uint32_t x323; + uint32_t x324; + uint32_t x325; + uint32_t x326; + uint32_t x327; + uint32_t x328; + uint32_t x329; + fiat_secp384r1_uint1 x330; + uint32_t x331; + fiat_secp384r1_uint1 x332; + uint32_t x333; + fiat_secp384r1_uint1 x334; + uint32_t x335; + fiat_secp384r1_uint1 x336; + uint32_t x337; + fiat_secp384r1_uint1 x338; + uint32_t x339; + fiat_secp384r1_uint1 x340; + uint32_t x341; + fiat_secp384r1_uint1 x342; + uint32_t x343; + fiat_secp384r1_uint1 x344; + uint32_t x345; + fiat_secp384r1_uint1 x346; + uint32_t x347; + fiat_secp384r1_uint1 x348; + uint32_t x349; + fiat_secp384r1_uint1 x350; + uint32_t x351; + fiat_secp384r1_uint1 x352; + uint32_t x353; + fiat_secp384r1_uint1 x354; + uint32_t x355; + fiat_secp384r1_uint1 x356; + uint32_t x357; + fiat_secp384r1_uint1 x358; + uint32_t x359; + fiat_secp384r1_uint1 x360; + uint32_t x361; + fiat_secp384r1_uint1 x362; + uint32_t x363; + fiat_secp384r1_uint1 x364; + uint32_t x365; + fiat_secp384r1_uint1 x366; + uint32_t x367; + fiat_secp384r1_uint1 x368; + uint32_t x369; + fiat_secp384r1_uint1 x370; + uint32_t x371; + uint32_t x372; + uint32_t x373; + uint32_t x374; + uint32_t x375; + uint32_t x376; + uint32_t x377; + uint32_t x378; + uint32_t x379; + fiat_secp384r1_uint1 x380; + uint32_t x381; + fiat_secp384r1_uint1 x382; + uint32_t x383; + fiat_secp384r1_uint1 x384; + uint32_t x385; + fiat_secp384r1_uint1 x386; + uint32_t x387; + fiat_secp384r1_uint1 x388; + uint32_t x389; + fiat_secp384r1_uint1 x390; + uint32_t x391; + fiat_secp384r1_uint1 x392; + uint32_t x393; + fiat_secp384r1_uint1 x394; + uint32_t x395; + fiat_secp384r1_uint1 x396; + uint32_t x397; + fiat_secp384r1_uint1 x398; + uint32_t x399; + fiat_secp384r1_uint1 x400; + uint32_t x401; + fiat_secp384r1_uint1 x402; + uint32_t x403; + fiat_secp384r1_uint1 x404; + uint32_t x405; + uint32_t x406; + uint32_t x407; + uint32_t x408; + uint32_t x409; + uint32_t x410; + uint32_t x411; + uint32_t x412; + uint32_t x413; + uint32_t x414; + uint32_t x415; + uint32_t x416; + uint32_t x417; + uint32_t x418; + uint32_t x419; + uint32_t x420; + uint32_t x421; + uint32_t x422; + uint32_t x423; + uint32_t x424; + uint32_t x425; + fiat_secp384r1_uint1 x426; + uint32_t x427; + fiat_secp384r1_uint1 x428; + uint32_t x429; + fiat_secp384r1_uint1 x430; + uint32_t x431; + fiat_secp384r1_uint1 x432; + uint32_t x433; + fiat_secp384r1_uint1 x434; + uint32_t x435; + fiat_secp384r1_uint1 x436; + uint32_t x437; + fiat_secp384r1_uint1 x438; + uint32_t x439; + fiat_secp384r1_uint1 x440; + uint32_t x441; + fiat_secp384r1_uint1 x442; + uint32_t x443; + fiat_secp384r1_uint1 x444; + uint32_t x445; + fiat_secp384r1_uint1 x446; + uint32_t x447; + fiat_secp384r1_uint1 x448; + uint32_t x449; + fiat_secp384r1_uint1 x450; + uint32_t x451; + fiat_secp384r1_uint1 x452; + uint32_t x453; + fiat_secp384r1_uint1 x454; + uint32_t x455; + fiat_secp384r1_uint1 x456; + uint32_t x457; + fiat_secp384r1_uint1 x458; + uint32_t x459; + fiat_secp384r1_uint1 x460; + uint32_t x461; + fiat_secp384r1_uint1 x462; + uint32_t x463; + fiat_secp384r1_uint1 x464; + uint32_t x465; + fiat_secp384r1_uint1 x466; + uint32_t x467; + uint32_t x468; + uint32_t x469; + uint32_t x470; + uint32_t x471; + uint32_t x472; + uint32_t x473; + uint32_t x474; + uint32_t x475; + fiat_secp384r1_uint1 x476; + uint32_t x477; + fiat_secp384r1_uint1 x478; + uint32_t x479; + fiat_secp384r1_uint1 x480; + uint32_t x481; + fiat_secp384r1_uint1 x482; + uint32_t x483; + fiat_secp384r1_uint1 x484; + uint32_t x485; + fiat_secp384r1_uint1 x486; + uint32_t x487; + fiat_secp384r1_uint1 x488; + uint32_t x489; + fiat_secp384r1_uint1 x490; + uint32_t x491; + fiat_secp384r1_uint1 x492; + uint32_t x493; + fiat_secp384r1_uint1 x494; + uint32_t x495; + fiat_secp384r1_uint1 x496; + uint32_t x497; + fiat_secp384r1_uint1 x498; + uint32_t x499; + fiat_secp384r1_uint1 x500; + uint32_t x501; + uint32_t x502; + uint32_t x503; + uint32_t x504; + uint32_t x505; + uint32_t x506; + uint32_t x507; + uint32_t x508; + uint32_t x509; + uint32_t x510; + uint32_t x511; + uint32_t x512; + uint32_t x513; + uint32_t x514; + uint32_t x515; + uint32_t x516; + uint32_t x517; + uint32_t x518; + uint32_t x519; + uint32_t x520; + uint32_t x521; + fiat_secp384r1_uint1 x522; + uint32_t x523; + fiat_secp384r1_uint1 x524; + uint32_t x525; + fiat_secp384r1_uint1 x526; + uint32_t x527; + fiat_secp384r1_uint1 x528; + uint32_t x529; + fiat_secp384r1_uint1 x530; + uint32_t x531; + fiat_secp384r1_uint1 x532; + uint32_t x533; + fiat_secp384r1_uint1 x534; + uint32_t x535; + fiat_secp384r1_uint1 x536; + uint32_t x537; + fiat_secp384r1_uint1 x538; + uint32_t x539; + fiat_secp384r1_uint1 x540; + uint32_t x541; + fiat_secp384r1_uint1 x542; + uint32_t x543; + fiat_secp384r1_uint1 x544; + uint32_t x545; + fiat_secp384r1_uint1 x546; + uint32_t x547; + fiat_secp384r1_uint1 x548; + uint32_t x549; + fiat_secp384r1_uint1 x550; + uint32_t x551; + fiat_secp384r1_uint1 x552; + uint32_t x553; + fiat_secp384r1_uint1 x554; + uint32_t x555; + fiat_secp384r1_uint1 x556; + uint32_t x557; + fiat_secp384r1_uint1 x558; + uint32_t x559; + fiat_secp384r1_uint1 x560; + uint32_t x561; + fiat_secp384r1_uint1 x562; + uint32_t x563; + uint32_t x564; + uint32_t x565; + uint32_t x566; + uint32_t x567; + uint32_t x568; + uint32_t x569; + uint32_t x570; + uint32_t x571; + fiat_secp384r1_uint1 x572; + uint32_t x573; + fiat_secp384r1_uint1 x574; + uint32_t x575; + fiat_secp384r1_uint1 x576; + uint32_t x577; + fiat_secp384r1_uint1 x578; + uint32_t x579; + fiat_secp384r1_uint1 x580; + uint32_t x581; + fiat_secp384r1_uint1 x582; + uint32_t x583; + fiat_secp384r1_uint1 x584; + uint32_t x585; + fiat_secp384r1_uint1 x586; + uint32_t x587; + fiat_secp384r1_uint1 x588; + uint32_t x589; + fiat_secp384r1_uint1 x590; + uint32_t x591; + fiat_secp384r1_uint1 x592; + uint32_t x593; + fiat_secp384r1_uint1 x594; + uint32_t x595; + fiat_secp384r1_uint1 x596; + uint32_t x597; + uint32_t x598; + uint32_t x599; + uint32_t x600; + uint32_t x601; + uint32_t x602; + uint32_t x603; + uint32_t x604; + uint32_t x605; + uint32_t x606; + uint32_t x607; + uint32_t x608; + uint32_t x609; + uint32_t x610; + uint32_t x611; + uint32_t x612; + uint32_t x613; + uint32_t x614; + uint32_t x615; + uint32_t x616; + uint32_t x617; + fiat_secp384r1_uint1 x618; + uint32_t x619; + fiat_secp384r1_uint1 x620; + uint32_t x621; + fiat_secp384r1_uint1 x622; + uint32_t x623; + fiat_secp384r1_uint1 x624; + uint32_t x625; + fiat_secp384r1_uint1 x626; + uint32_t x627; + fiat_secp384r1_uint1 x628; + uint32_t x629; + fiat_secp384r1_uint1 x630; + uint32_t x631; + fiat_secp384r1_uint1 x632; + uint32_t x633; + fiat_secp384r1_uint1 x634; + uint32_t x635; + fiat_secp384r1_uint1 x636; + uint32_t x637; + fiat_secp384r1_uint1 x638; + uint32_t x639; + fiat_secp384r1_uint1 x640; + uint32_t x641; + fiat_secp384r1_uint1 x642; + uint32_t x643; + fiat_secp384r1_uint1 x644; + uint32_t x645; + fiat_secp384r1_uint1 x646; + uint32_t x647; + fiat_secp384r1_uint1 x648; + uint32_t x649; + fiat_secp384r1_uint1 x650; + uint32_t x651; + fiat_secp384r1_uint1 x652; + uint32_t x653; + fiat_secp384r1_uint1 x654; + uint32_t x655; + fiat_secp384r1_uint1 x656; + uint32_t x657; + fiat_secp384r1_uint1 x658; + uint32_t x659; + uint32_t x660; + uint32_t x661; + uint32_t x662; + uint32_t x663; + uint32_t x664; + uint32_t x665; + uint32_t x666; + uint32_t x667; + fiat_secp384r1_uint1 x668; + uint32_t x669; + fiat_secp384r1_uint1 x670; + uint32_t x671; + fiat_secp384r1_uint1 x672; + uint32_t x673; + fiat_secp384r1_uint1 x674; + uint32_t x675; + fiat_secp384r1_uint1 x676; + uint32_t x677; + fiat_secp384r1_uint1 x678; + uint32_t x679; + fiat_secp384r1_uint1 x680; + uint32_t x681; + fiat_secp384r1_uint1 x682; + uint32_t x683; + fiat_secp384r1_uint1 x684; + uint32_t x685; + fiat_secp384r1_uint1 x686; + uint32_t x687; + fiat_secp384r1_uint1 x688; + uint32_t x689; + fiat_secp384r1_uint1 x690; + uint32_t x691; + fiat_secp384r1_uint1 x692; + uint32_t x693; + uint32_t x694; + uint32_t x695; + uint32_t x696; + uint32_t x697; + uint32_t x698; + uint32_t x699; + uint32_t x700; + uint32_t x701; + uint32_t x702; + uint32_t x703; + uint32_t x704; + uint32_t x705; + uint32_t x706; + uint32_t x707; + uint32_t x708; + uint32_t x709; + uint32_t x710; + uint32_t x711; + uint32_t x712; + uint32_t x713; + fiat_secp384r1_uint1 x714; + uint32_t x715; + fiat_secp384r1_uint1 x716; + uint32_t x717; + fiat_secp384r1_uint1 x718; + uint32_t x719; + fiat_secp384r1_uint1 x720; + uint32_t x721; + fiat_secp384r1_uint1 x722; + uint32_t x723; + fiat_secp384r1_uint1 x724; + uint32_t x725; + fiat_secp384r1_uint1 x726; + uint32_t x727; + fiat_secp384r1_uint1 x728; + uint32_t x729; + fiat_secp384r1_uint1 x730; + uint32_t x731; + fiat_secp384r1_uint1 x732; + uint32_t x733; + fiat_secp384r1_uint1 x734; + uint32_t x735; + fiat_secp384r1_uint1 x736; + uint32_t x737; + fiat_secp384r1_uint1 x738; + uint32_t x739; + fiat_secp384r1_uint1 x740; + uint32_t x741; + fiat_secp384r1_uint1 x742; + uint32_t x743; + fiat_secp384r1_uint1 x744; + uint32_t x745; + fiat_secp384r1_uint1 x746; + uint32_t x747; + fiat_secp384r1_uint1 x748; + uint32_t x749; + fiat_secp384r1_uint1 x750; + uint32_t x751; + fiat_secp384r1_uint1 x752; + uint32_t x753; + fiat_secp384r1_uint1 x754; + uint32_t x755; + uint32_t x756; + uint32_t x757; + uint32_t x758; + uint32_t x759; + uint32_t x760; + uint32_t x761; + uint32_t x762; + uint32_t x763; + fiat_secp384r1_uint1 x764; + uint32_t x765; + fiat_secp384r1_uint1 x766; + uint32_t x767; + fiat_secp384r1_uint1 x768; + uint32_t x769; + fiat_secp384r1_uint1 x770; + uint32_t x771; + fiat_secp384r1_uint1 x772; + uint32_t x773; + fiat_secp384r1_uint1 x774; + uint32_t x775; + fiat_secp384r1_uint1 x776; + uint32_t x777; + fiat_secp384r1_uint1 x778; + uint32_t x779; + fiat_secp384r1_uint1 x780; + uint32_t x781; + fiat_secp384r1_uint1 x782; + uint32_t x783; + fiat_secp384r1_uint1 x784; + uint32_t x785; + fiat_secp384r1_uint1 x786; + uint32_t x787; + fiat_secp384r1_uint1 x788; + uint32_t x789; + uint32_t x790; + uint32_t x791; + uint32_t x792; + uint32_t x793; + uint32_t x794; + uint32_t x795; + uint32_t x796; + uint32_t x797; + uint32_t x798; + uint32_t x799; + uint32_t x800; + uint32_t x801; + uint32_t x802; + uint32_t x803; + uint32_t x804; + uint32_t x805; + uint32_t x806; + uint32_t x807; + uint32_t x808; + uint32_t x809; + fiat_secp384r1_uint1 x810; + uint32_t x811; + fiat_secp384r1_uint1 x812; + uint32_t x813; + fiat_secp384r1_uint1 x814; + uint32_t x815; + fiat_secp384r1_uint1 x816; + uint32_t x817; + fiat_secp384r1_uint1 x818; + uint32_t x819; + fiat_secp384r1_uint1 x820; + uint32_t x821; + fiat_secp384r1_uint1 x822; + uint32_t x823; + fiat_secp384r1_uint1 x824; + uint32_t x825; + fiat_secp384r1_uint1 x826; + uint32_t x827; + fiat_secp384r1_uint1 x828; + uint32_t x829; + fiat_secp384r1_uint1 x830; + uint32_t x831; + fiat_secp384r1_uint1 x832; + uint32_t x833; + fiat_secp384r1_uint1 x834; + uint32_t x835; + fiat_secp384r1_uint1 x836; + uint32_t x837; + fiat_secp384r1_uint1 x838; + uint32_t x839; + fiat_secp384r1_uint1 x840; + uint32_t x841; + fiat_secp384r1_uint1 x842; + uint32_t x843; + fiat_secp384r1_uint1 x844; + uint32_t x845; + fiat_secp384r1_uint1 x846; + uint32_t x847; + fiat_secp384r1_uint1 x848; + uint32_t x849; + fiat_secp384r1_uint1 x850; + uint32_t x851; + uint32_t x852; + uint32_t x853; + uint32_t x854; + uint32_t x855; + uint32_t x856; + uint32_t x857; + uint32_t x858; + uint32_t x859; + fiat_secp384r1_uint1 x860; + uint32_t x861; + fiat_secp384r1_uint1 x862; + uint32_t x863; + fiat_secp384r1_uint1 x864; + uint32_t x865; + fiat_secp384r1_uint1 x866; + uint32_t x867; + fiat_secp384r1_uint1 x868; + uint32_t x869; + fiat_secp384r1_uint1 x870; + uint32_t x871; + fiat_secp384r1_uint1 x872; + uint32_t x873; + fiat_secp384r1_uint1 x874; + uint32_t x875; + fiat_secp384r1_uint1 x876; + uint32_t x877; + fiat_secp384r1_uint1 x878; + uint32_t x879; + fiat_secp384r1_uint1 x880; + uint32_t x881; + fiat_secp384r1_uint1 x882; + uint32_t x883; + fiat_secp384r1_uint1 x884; + uint32_t x885; + uint32_t x886; + uint32_t x887; + uint32_t x888; + uint32_t x889; + uint32_t x890; + uint32_t x891; + uint32_t x892; + uint32_t x893; + uint32_t x894; + uint32_t x895; + uint32_t x896; + uint32_t x897; + uint32_t x898; + uint32_t x899; + uint32_t x900; + uint32_t x901; + uint32_t x902; + uint32_t x903; + uint32_t x904; + uint32_t x905; + fiat_secp384r1_uint1 x906; + uint32_t x907; + fiat_secp384r1_uint1 x908; + uint32_t x909; + fiat_secp384r1_uint1 x910; + uint32_t x911; + fiat_secp384r1_uint1 x912; + uint32_t x913; + fiat_secp384r1_uint1 x914; + uint32_t x915; + fiat_secp384r1_uint1 x916; + uint32_t x917; + fiat_secp384r1_uint1 x918; + uint32_t x919; + fiat_secp384r1_uint1 x920; + uint32_t x921; + fiat_secp384r1_uint1 x922; + uint32_t x923; + fiat_secp384r1_uint1 x924; + uint32_t x925; + fiat_secp384r1_uint1 x926; + uint32_t x927; + fiat_secp384r1_uint1 x928; + uint32_t x929; + fiat_secp384r1_uint1 x930; + uint32_t x931; + fiat_secp384r1_uint1 x932; + uint32_t x933; + fiat_secp384r1_uint1 x934; + uint32_t x935; + fiat_secp384r1_uint1 x936; + uint32_t x937; + fiat_secp384r1_uint1 x938; + uint32_t x939; + fiat_secp384r1_uint1 x940; + uint32_t x941; + fiat_secp384r1_uint1 x942; + uint32_t x943; + fiat_secp384r1_uint1 x944; + uint32_t x945; + fiat_secp384r1_uint1 x946; + uint32_t x947; + uint32_t x948; + uint32_t x949; + uint32_t x950; + uint32_t x951; + uint32_t x952; + uint32_t x953; + uint32_t x954; + uint32_t x955; + fiat_secp384r1_uint1 x956; + uint32_t x957; + fiat_secp384r1_uint1 x958; + uint32_t x959; + fiat_secp384r1_uint1 x960; + uint32_t x961; + fiat_secp384r1_uint1 x962; + uint32_t x963; + fiat_secp384r1_uint1 x964; + uint32_t x965; + fiat_secp384r1_uint1 x966; + uint32_t x967; + fiat_secp384r1_uint1 x968; + uint32_t x969; + fiat_secp384r1_uint1 x970; + uint32_t x971; + fiat_secp384r1_uint1 x972; + uint32_t x973; + fiat_secp384r1_uint1 x974; + uint32_t x975; + fiat_secp384r1_uint1 x976; + uint32_t x977; + fiat_secp384r1_uint1 x978; + uint32_t x979; + fiat_secp384r1_uint1 x980; + uint32_t x981; + uint32_t x982; + uint32_t x983; + uint32_t x984; + uint32_t x985; + uint32_t x986; + uint32_t x987; + uint32_t x988; + uint32_t x989; + uint32_t x990; + uint32_t x991; + uint32_t x992; + uint32_t x993; + uint32_t x994; + uint32_t x995; + uint32_t x996; + uint32_t x997; + uint32_t x998; + uint32_t x999; + uint32_t x1000; + uint32_t x1001; + fiat_secp384r1_uint1 x1002; + uint32_t x1003; + fiat_secp384r1_uint1 x1004; + uint32_t x1005; + fiat_secp384r1_uint1 x1006; + uint32_t x1007; + fiat_secp384r1_uint1 x1008; + uint32_t x1009; + fiat_secp384r1_uint1 x1010; + uint32_t x1011; + fiat_secp384r1_uint1 x1012; + uint32_t x1013; + fiat_secp384r1_uint1 x1014; + uint32_t x1015; + fiat_secp384r1_uint1 x1016; + uint32_t x1017; + fiat_secp384r1_uint1 x1018; + uint32_t x1019; + fiat_secp384r1_uint1 x1020; + uint32_t x1021; + fiat_secp384r1_uint1 x1022; + uint32_t x1023; + fiat_secp384r1_uint1 x1024; + uint32_t x1025; + fiat_secp384r1_uint1 x1026; + uint32_t x1027; + fiat_secp384r1_uint1 x1028; + uint32_t x1029; + fiat_secp384r1_uint1 x1030; + uint32_t x1031; + fiat_secp384r1_uint1 x1032; + uint32_t x1033; + fiat_secp384r1_uint1 x1034; + uint32_t x1035; + fiat_secp384r1_uint1 x1036; + uint32_t x1037; + fiat_secp384r1_uint1 x1038; + uint32_t x1039; + fiat_secp384r1_uint1 x1040; + uint32_t x1041; + fiat_secp384r1_uint1 x1042; + uint32_t x1043; + uint32_t x1044; + uint32_t x1045; + uint32_t x1046; + uint32_t x1047; + uint32_t x1048; + uint32_t x1049; + uint32_t x1050; + uint32_t x1051; + fiat_secp384r1_uint1 x1052; + uint32_t x1053; + fiat_secp384r1_uint1 x1054; + uint32_t x1055; + fiat_secp384r1_uint1 x1056; + uint32_t x1057; + fiat_secp384r1_uint1 x1058; + uint32_t x1059; + fiat_secp384r1_uint1 x1060; + uint32_t x1061; + fiat_secp384r1_uint1 x1062; + uint32_t x1063; + fiat_secp384r1_uint1 x1064; + uint32_t x1065; + fiat_secp384r1_uint1 x1066; + uint32_t x1067; + fiat_secp384r1_uint1 x1068; + uint32_t x1069; + fiat_secp384r1_uint1 x1070; + uint32_t x1071; + fiat_secp384r1_uint1 x1072; + uint32_t x1073; + fiat_secp384r1_uint1 x1074; + uint32_t x1075; + fiat_secp384r1_uint1 x1076; + uint32_t x1077; + uint32_t x1078; + uint32_t x1079; + uint32_t x1080; + uint32_t x1081; + uint32_t x1082; + uint32_t x1083; + uint32_t x1084; + uint32_t x1085; + uint32_t x1086; + uint32_t x1087; + uint32_t x1088; + uint32_t x1089; + uint32_t x1090; + uint32_t x1091; + uint32_t x1092; + uint32_t x1093; + uint32_t x1094; + uint32_t x1095; + uint32_t x1096; + uint32_t x1097; + fiat_secp384r1_uint1 x1098; + uint32_t x1099; + fiat_secp384r1_uint1 x1100; + uint32_t x1101; + fiat_secp384r1_uint1 x1102; + uint32_t x1103; + fiat_secp384r1_uint1 x1104; + uint32_t x1105; + fiat_secp384r1_uint1 x1106; + uint32_t x1107; + fiat_secp384r1_uint1 x1108; + uint32_t x1109; + fiat_secp384r1_uint1 x1110; + uint32_t x1111; + fiat_secp384r1_uint1 x1112; + uint32_t x1113; + fiat_secp384r1_uint1 x1114; + uint32_t x1115; + fiat_secp384r1_uint1 x1116; + uint32_t x1117; + fiat_secp384r1_uint1 x1118; + uint32_t x1119; + fiat_secp384r1_uint1 x1120; + uint32_t x1121; + fiat_secp384r1_uint1 x1122; + uint32_t x1123; + fiat_secp384r1_uint1 x1124; + uint32_t x1125; + fiat_secp384r1_uint1 x1126; + uint32_t x1127; + fiat_secp384r1_uint1 x1128; + uint32_t x1129; + fiat_secp384r1_uint1 x1130; + uint32_t x1131; + fiat_secp384r1_uint1 x1132; + uint32_t x1133; + fiat_secp384r1_uint1 x1134; + uint32_t x1135; + fiat_secp384r1_uint1 x1136; + uint32_t x1137; + fiat_secp384r1_uint1 x1138; + uint32_t x1139; + fiat_secp384r1_uint1 x1140; + uint32_t x1141; + fiat_secp384r1_uint1 x1142; + uint32_t x1143; + fiat_secp384r1_uint1 x1144; + uint32_t x1145; + fiat_secp384r1_uint1 x1146; + uint32_t x1147; + fiat_secp384r1_uint1 x1148; + uint32_t x1149; + fiat_secp384r1_uint1 x1150; + uint32_t x1151; + fiat_secp384r1_uint1 x1152; + uint32_t x1153; + fiat_secp384r1_uint1 x1154; + uint32_t x1155; + fiat_secp384r1_uint1 x1156; + uint32_t x1157; + fiat_secp384r1_uint1 x1158; + uint32_t x1159; + fiat_secp384r1_uint1 x1160; + uint32_t x1161; + fiat_secp384r1_uint1 x1162; + uint32_t x1163; + fiat_secp384r1_uint1 x1164; + uint32_t x1165; + uint32_t x1166; + uint32_t x1167; + uint32_t x1168; + uint32_t x1169; + uint32_t x1170; + uint32_t x1171; + uint32_t x1172; + uint32_t x1173; + uint32_t x1174; + uint32_t x1175; + uint32_t x1176; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[4]); + x5 = (arg1[5]); + x6 = (arg1[6]); + x7 = (arg1[7]); + x8 = (arg1[8]); + x9 = (arg1[9]); + x10 = (arg1[10]); + x11 = (arg1[11]); + x12 = (arg1[0]); + fiat_secp384r1_mulx_u32(&x13, &x14, x12, 0x2); + fiat_secp384r1_mulx_u32(&x15, &x16, x12, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x17, &x18, x12, 0x2); + fiat_secp384r1_mulx_u32(&x19, &x20, x12, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x21, &x22, 0x0, (fiat_secp384r1_uint1)x14, + x12); + fiat_secp384r1_mulx_u32(&x23, &x24, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x25, &x26, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x27, &x28, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x29, &x30, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x31, &x32, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x33, &x34, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x35, &x36, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x37, &x38, x12, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x39, &x40, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x41, &x42, x12, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x43, &x44, 0x0, x40, x37); + fiat_secp384r1_addcarryx_u32(&x45, &x46, x44, x38, x35); + fiat_secp384r1_addcarryx_u32(&x47, &x48, x46, x36, x33); + fiat_secp384r1_addcarryx_u32(&x49, &x50, x48, x34, x31); + fiat_secp384r1_addcarryx_u32(&x51, &x52, x50, x32, x29); + fiat_secp384r1_addcarryx_u32(&x53, &x54, x52, x30, x27); + fiat_secp384r1_addcarryx_u32(&x55, &x56, x54, x28, x25); + fiat_secp384r1_addcarryx_u32(&x57, &x58, x56, x26, x23); + fiat_secp384r1_addcarryx_u32(&x59, &x60, 0x0, x12, x41); + fiat_secp384r1_addcarryx_u32(&x61, &x62, x60, x19, x42); + fiat_secp384r1_addcarryx_u32(&x63, &x64, 0x0, x17, x39); + fiat_secp384r1_addcarryx_u32(&x65, &x66, x64, (fiat_secp384r1_uint1)x18, + x43); + fiat_secp384r1_addcarryx_u32(&x67, &x68, x66, x15, x45); + fiat_secp384r1_addcarryx_u32(&x69, &x70, x68, x16, x47); + fiat_secp384r1_addcarryx_u32(&x71, &x72, x70, x13, x49); + fiat_secp384r1_addcarryx_u32(&x73, &x74, x72, x21, x51); + fiat_secp384r1_addcarryx_u32(&x75, &x76, x74, x22, x53); + fiat_secp384r1_addcarryx_u32(&x77, &x78, x76, 0x0, x55); + fiat_secp384r1_addcarryx_u32(&x79, &x80, x78, 0x0, x57); + fiat_secp384r1_addcarryx_u32(&x81, &x82, x80, 0x0, (x58 + x24)); + fiat_secp384r1_mulx_u32(&x83, &x84, x1, 0x2); + fiat_secp384r1_mulx_u32(&x85, &x86, x1, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x87, &x88, x1, 0x2); + fiat_secp384r1_mulx_u32(&x89, &x90, x1, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x91, &x92, 0x0, (fiat_secp384r1_uint1)x84, + x1); + fiat_secp384r1_addcarryx_u32(&x93, &x94, 0x0, x61, x1); + fiat_secp384r1_addcarryx_u32(&x95, &x96, x94, (x62 + x20), x89); + fiat_secp384r1_addcarryx_u32(&x97, &x98, x96, x63, x90); + fiat_secp384r1_addcarryx_u32(&x99, &x100, x98, x65, x87); + fiat_secp384r1_addcarryx_u32(&x101, &x102, x100, x67, + (fiat_secp384r1_uint1)x88); + fiat_secp384r1_addcarryx_u32(&x103, &x104, x102, x69, x85); + fiat_secp384r1_addcarryx_u32(&x105, &x106, x104, x71, x86); + fiat_secp384r1_addcarryx_u32(&x107, &x108, x106, x73, x83); + fiat_secp384r1_addcarryx_u32(&x109, &x110, x108, x75, x91); + fiat_secp384r1_addcarryx_u32(&x111, &x112, x110, x77, x92); + fiat_secp384r1_addcarryx_u32(&x113, &x114, x112, x79, 0x0); + fiat_secp384r1_addcarryx_u32(&x115, &x116, x114, x81, 0x0); + fiat_secp384r1_mulx_u32(&x117, &x118, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x119, &x120, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x121, &x122, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x123, &x124, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x125, &x126, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x127, &x128, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x129, &x130, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x131, &x132, x93, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x133, &x134, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x135, &x136, x93, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x137, &x138, 0x0, x134, x131); + fiat_secp384r1_addcarryx_u32(&x139, &x140, x138, x132, x129); + fiat_secp384r1_addcarryx_u32(&x141, &x142, x140, x130, x127); + fiat_secp384r1_addcarryx_u32(&x143, &x144, x142, x128, x125); + fiat_secp384r1_addcarryx_u32(&x145, &x146, x144, x126, x123); + fiat_secp384r1_addcarryx_u32(&x147, &x148, x146, x124, x121); + fiat_secp384r1_addcarryx_u32(&x149, &x150, x148, x122, x119); + fiat_secp384r1_addcarryx_u32(&x151, &x152, x150, x120, x117); + fiat_secp384r1_addcarryx_u32(&x153, &x154, 0x0, x93, x135); + fiat_secp384r1_addcarryx_u32(&x155, &x156, x154, x95, x136); + fiat_secp384r1_addcarryx_u32(&x157, &x158, x156, x97, 0x0); + fiat_secp384r1_addcarryx_u32(&x159, &x160, x158, x99, x133); + fiat_secp384r1_addcarryx_u32(&x161, &x162, x160, x101, x137); + fiat_secp384r1_addcarryx_u32(&x163, &x164, x162, x103, x139); + fiat_secp384r1_addcarryx_u32(&x165, &x166, x164, x105, x141); + fiat_secp384r1_addcarryx_u32(&x167, &x168, x166, x107, x143); + fiat_secp384r1_addcarryx_u32(&x169, &x170, x168, x109, x145); + fiat_secp384r1_addcarryx_u32(&x171, &x172, x170, x111, x147); + fiat_secp384r1_addcarryx_u32(&x173, &x174, x172, x113, x149); + fiat_secp384r1_addcarryx_u32(&x175, &x176, x174, x115, x151); + fiat_secp384r1_addcarryx_u32(&x177, &x178, x176, ((uint32_t)x116 + x82), + (x152 + x118)); + fiat_secp384r1_mulx_u32(&x179, &x180, x2, 0x2); + fiat_secp384r1_mulx_u32(&x181, &x182, x2, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x183, &x184, x2, 0x2); + fiat_secp384r1_mulx_u32(&x185, &x186, x2, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x187, &x188, 0x0, (fiat_secp384r1_uint1)x180, + x2); + fiat_secp384r1_addcarryx_u32(&x189, &x190, 0x0, x155, x2); + fiat_secp384r1_addcarryx_u32(&x191, &x192, x190, x157, x185); + fiat_secp384r1_addcarryx_u32(&x193, &x194, x192, x159, x186); + fiat_secp384r1_addcarryx_u32(&x195, &x196, x194, x161, x183); + fiat_secp384r1_addcarryx_u32(&x197, &x198, x196, x163, + (fiat_secp384r1_uint1)x184); + fiat_secp384r1_addcarryx_u32(&x199, &x200, x198, x165, x181); + fiat_secp384r1_addcarryx_u32(&x201, &x202, x200, x167, x182); + fiat_secp384r1_addcarryx_u32(&x203, &x204, x202, x169, x179); + fiat_secp384r1_addcarryx_u32(&x205, &x206, x204, x171, x187); + fiat_secp384r1_addcarryx_u32(&x207, &x208, x206, x173, x188); + fiat_secp384r1_addcarryx_u32(&x209, &x210, x208, x175, 0x0); + fiat_secp384r1_addcarryx_u32(&x211, &x212, x210, x177, 0x0); + fiat_secp384r1_mulx_u32(&x213, &x214, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x215, &x216, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x217, &x218, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x219, &x220, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x221, &x222, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x223, &x224, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x225, &x226, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x227, &x228, x189, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x229, &x230, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x231, &x232, x189, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x233, &x234, 0x0, x230, x227); + fiat_secp384r1_addcarryx_u32(&x235, &x236, x234, x228, x225); + fiat_secp384r1_addcarryx_u32(&x237, &x238, x236, x226, x223); + fiat_secp384r1_addcarryx_u32(&x239, &x240, x238, x224, x221); + fiat_secp384r1_addcarryx_u32(&x241, &x242, x240, x222, x219); + fiat_secp384r1_addcarryx_u32(&x243, &x244, x242, x220, x217); + fiat_secp384r1_addcarryx_u32(&x245, &x246, x244, x218, x215); + fiat_secp384r1_addcarryx_u32(&x247, &x248, x246, x216, x213); + fiat_secp384r1_addcarryx_u32(&x249, &x250, 0x0, x189, x231); + fiat_secp384r1_addcarryx_u32(&x251, &x252, x250, x191, x232); + fiat_secp384r1_addcarryx_u32(&x253, &x254, x252, x193, 0x0); + fiat_secp384r1_addcarryx_u32(&x255, &x256, x254, x195, x229); + fiat_secp384r1_addcarryx_u32(&x257, &x258, x256, x197, x233); + fiat_secp384r1_addcarryx_u32(&x259, &x260, x258, x199, x235); + fiat_secp384r1_addcarryx_u32(&x261, &x262, x260, x201, x237); + fiat_secp384r1_addcarryx_u32(&x263, &x264, x262, x203, x239); + fiat_secp384r1_addcarryx_u32(&x265, &x266, x264, x205, x241); + fiat_secp384r1_addcarryx_u32(&x267, &x268, x266, x207, x243); + fiat_secp384r1_addcarryx_u32(&x269, &x270, x268, x209, x245); + fiat_secp384r1_addcarryx_u32(&x271, &x272, x270, x211, x247); + fiat_secp384r1_addcarryx_u32(&x273, &x274, x272, ((uint32_t)x212 + x178), + (x248 + x214)); + fiat_secp384r1_mulx_u32(&x275, &x276, x3, 0x2); + fiat_secp384r1_mulx_u32(&x277, &x278, x3, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x279, &x280, x3, 0x2); + fiat_secp384r1_mulx_u32(&x281, &x282, x3, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x283, &x284, 0x0, (fiat_secp384r1_uint1)x276, + x3); + fiat_secp384r1_addcarryx_u32(&x285, &x286, 0x0, x251, x3); + fiat_secp384r1_addcarryx_u32(&x287, &x288, x286, x253, x281); + fiat_secp384r1_addcarryx_u32(&x289, &x290, x288, x255, x282); + fiat_secp384r1_addcarryx_u32(&x291, &x292, x290, x257, x279); + fiat_secp384r1_addcarryx_u32(&x293, &x294, x292, x259, + (fiat_secp384r1_uint1)x280); + fiat_secp384r1_addcarryx_u32(&x295, &x296, x294, x261, x277); + fiat_secp384r1_addcarryx_u32(&x297, &x298, x296, x263, x278); + fiat_secp384r1_addcarryx_u32(&x299, &x300, x298, x265, x275); + fiat_secp384r1_addcarryx_u32(&x301, &x302, x300, x267, x283); + fiat_secp384r1_addcarryx_u32(&x303, &x304, x302, x269, x284); + fiat_secp384r1_addcarryx_u32(&x305, &x306, x304, x271, 0x0); + fiat_secp384r1_addcarryx_u32(&x307, &x308, x306, x273, 0x0); + fiat_secp384r1_mulx_u32(&x309, &x310, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x311, &x312, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x313, &x314, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x315, &x316, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x317, &x318, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x319, &x320, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x321, &x322, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x323, &x324, x285, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x325, &x326, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x327, &x328, x285, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x329, &x330, 0x0, x326, x323); + fiat_secp384r1_addcarryx_u32(&x331, &x332, x330, x324, x321); + fiat_secp384r1_addcarryx_u32(&x333, &x334, x332, x322, x319); + fiat_secp384r1_addcarryx_u32(&x335, &x336, x334, x320, x317); + fiat_secp384r1_addcarryx_u32(&x337, &x338, x336, x318, x315); + fiat_secp384r1_addcarryx_u32(&x339, &x340, x338, x316, x313); + fiat_secp384r1_addcarryx_u32(&x341, &x342, x340, x314, x311); + fiat_secp384r1_addcarryx_u32(&x343, &x344, x342, x312, x309); + fiat_secp384r1_addcarryx_u32(&x345, &x346, 0x0, x285, x327); + fiat_secp384r1_addcarryx_u32(&x347, &x348, x346, x287, x328); + fiat_secp384r1_addcarryx_u32(&x349, &x350, x348, x289, 0x0); + fiat_secp384r1_addcarryx_u32(&x351, &x352, x350, x291, x325); + fiat_secp384r1_addcarryx_u32(&x353, &x354, x352, x293, x329); + fiat_secp384r1_addcarryx_u32(&x355, &x356, x354, x295, x331); + fiat_secp384r1_addcarryx_u32(&x357, &x358, x356, x297, x333); + fiat_secp384r1_addcarryx_u32(&x359, &x360, x358, x299, x335); + fiat_secp384r1_addcarryx_u32(&x361, &x362, x360, x301, x337); + fiat_secp384r1_addcarryx_u32(&x363, &x364, x362, x303, x339); + fiat_secp384r1_addcarryx_u32(&x365, &x366, x364, x305, x341); + fiat_secp384r1_addcarryx_u32(&x367, &x368, x366, x307, x343); + fiat_secp384r1_addcarryx_u32(&x369, &x370, x368, ((uint32_t)x308 + x274), + (x344 + x310)); + fiat_secp384r1_mulx_u32(&x371, &x372, x4, 0x2); + fiat_secp384r1_mulx_u32(&x373, &x374, x4, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x375, &x376, x4, 0x2); + fiat_secp384r1_mulx_u32(&x377, &x378, x4, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x379, &x380, 0x0, (fiat_secp384r1_uint1)x372, + x4); + fiat_secp384r1_addcarryx_u32(&x381, &x382, 0x0, x347, x4); + fiat_secp384r1_addcarryx_u32(&x383, &x384, x382, x349, x377); + fiat_secp384r1_addcarryx_u32(&x385, &x386, x384, x351, x378); + fiat_secp384r1_addcarryx_u32(&x387, &x388, x386, x353, x375); + fiat_secp384r1_addcarryx_u32(&x389, &x390, x388, x355, + (fiat_secp384r1_uint1)x376); + fiat_secp384r1_addcarryx_u32(&x391, &x392, x390, x357, x373); + fiat_secp384r1_addcarryx_u32(&x393, &x394, x392, x359, x374); + fiat_secp384r1_addcarryx_u32(&x395, &x396, x394, x361, x371); + fiat_secp384r1_addcarryx_u32(&x397, &x398, x396, x363, x379); + fiat_secp384r1_addcarryx_u32(&x399, &x400, x398, x365, x380); + fiat_secp384r1_addcarryx_u32(&x401, &x402, x400, x367, 0x0); + fiat_secp384r1_addcarryx_u32(&x403, &x404, x402, x369, 0x0); + fiat_secp384r1_mulx_u32(&x405, &x406, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x407, &x408, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x409, &x410, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x411, &x412, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x413, &x414, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x415, &x416, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x417, &x418, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x419, &x420, x381, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x421, &x422, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x423, &x424, x381, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x425, &x426, 0x0, x422, x419); + fiat_secp384r1_addcarryx_u32(&x427, &x428, x426, x420, x417); + fiat_secp384r1_addcarryx_u32(&x429, &x430, x428, x418, x415); + fiat_secp384r1_addcarryx_u32(&x431, &x432, x430, x416, x413); + fiat_secp384r1_addcarryx_u32(&x433, &x434, x432, x414, x411); + fiat_secp384r1_addcarryx_u32(&x435, &x436, x434, x412, x409); + fiat_secp384r1_addcarryx_u32(&x437, &x438, x436, x410, x407); + fiat_secp384r1_addcarryx_u32(&x439, &x440, x438, x408, x405); + fiat_secp384r1_addcarryx_u32(&x441, &x442, 0x0, x381, x423); + fiat_secp384r1_addcarryx_u32(&x443, &x444, x442, x383, x424); + fiat_secp384r1_addcarryx_u32(&x445, &x446, x444, x385, 0x0); + fiat_secp384r1_addcarryx_u32(&x447, &x448, x446, x387, x421); + fiat_secp384r1_addcarryx_u32(&x449, &x450, x448, x389, x425); + fiat_secp384r1_addcarryx_u32(&x451, &x452, x450, x391, x427); + fiat_secp384r1_addcarryx_u32(&x453, &x454, x452, x393, x429); + fiat_secp384r1_addcarryx_u32(&x455, &x456, x454, x395, x431); + fiat_secp384r1_addcarryx_u32(&x457, &x458, x456, x397, x433); + fiat_secp384r1_addcarryx_u32(&x459, &x460, x458, x399, x435); + fiat_secp384r1_addcarryx_u32(&x461, &x462, x460, x401, x437); + fiat_secp384r1_addcarryx_u32(&x463, &x464, x462, x403, x439); + fiat_secp384r1_addcarryx_u32(&x465, &x466, x464, ((uint32_t)x404 + x370), + (x440 + x406)); + fiat_secp384r1_mulx_u32(&x467, &x468, x5, 0x2); + fiat_secp384r1_mulx_u32(&x469, &x470, x5, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x471, &x472, x5, 0x2); + fiat_secp384r1_mulx_u32(&x473, &x474, x5, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x475, &x476, 0x0, (fiat_secp384r1_uint1)x468, + x5); + fiat_secp384r1_addcarryx_u32(&x477, &x478, 0x0, x443, x5); + fiat_secp384r1_addcarryx_u32(&x479, &x480, x478, x445, x473); + fiat_secp384r1_addcarryx_u32(&x481, &x482, x480, x447, x474); + fiat_secp384r1_addcarryx_u32(&x483, &x484, x482, x449, x471); + fiat_secp384r1_addcarryx_u32(&x485, &x486, x484, x451, + (fiat_secp384r1_uint1)x472); + fiat_secp384r1_addcarryx_u32(&x487, &x488, x486, x453, x469); + fiat_secp384r1_addcarryx_u32(&x489, &x490, x488, x455, x470); + fiat_secp384r1_addcarryx_u32(&x491, &x492, x490, x457, x467); + fiat_secp384r1_addcarryx_u32(&x493, &x494, x492, x459, x475); + fiat_secp384r1_addcarryx_u32(&x495, &x496, x494, x461, x476); + fiat_secp384r1_addcarryx_u32(&x497, &x498, x496, x463, 0x0); + fiat_secp384r1_addcarryx_u32(&x499, &x500, x498, x465, 0x0); + fiat_secp384r1_mulx_u32(&x501, &x502, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x503, &x504, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x505, &x506, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x507, &x508, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x509, &x510, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x511, &x512, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x513, &x514, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x515, &x516, x477, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x517, &x518, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x519, &x520, x477, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x521, &x522, 0x0, x518, x515); + fiat_secp384r1_addcarryx_u32(&x523, &x524, x522, x516, x513); + fiat_secp384r1_addcarryx_u32(&x525, &x526, x524, x514, x511); + fiat_secp384r1_addcarryx_u32(&x527, &x528, x526, x512, x509); + fiat_secp384r1_addcarryx_u32(&x529, &x530, x528, x510, x507); + fiat_secp384r1_addcarryx_u32(&x531, &x532, x530, x508, x505); + fiat_secp384r1_addcarryx_u32(&x533, &x534, x532, x506, x503); + fiat_secp384r1_addcarryx_u32(&x535, &x536, x534, x504, x501); + fiat_secp384r1_addcarryx_u32(&x537, &x538, 0x0, x477, x519); + fiat_secp384r1_addcarryx_u32(&x539, &x540, x538, x479, x520); + fiat_secp384r1_addcarryx_u32(&x541, &x542, x540, x481, 0x0); + fiat_secp384r1_addcarryx_u32(&x543, &x544, x542, x483, x517); + fiat_secp384r1_addcarryx_u32(&x545, &x546, x544, x485, x521); + fiat_secp384r1_addcarryx_u32(&x547, &x548, x546, x487, x523); + fiat_secp384r1_addcarryx_u32(&x549, &x550, x548, x489, x525); + fiat_secp384r1_addcarryx_u32(&x551, &x552, x550, x491, x527); + fiat_secp384r1_addcarryx_u32(&x553, &x554, x552, x493, x529); + fiat_secp384r1_addcarryx_u32(&x555, &x556, x554, x495, x531); + fiat_secp384r1_addcarryx_u32(&x557, &x558, x556, x497, x533); + fiat_secp384r1_addcarryx_u32(&x559, &x560, x558, x499, x535); + fiat_secp384r1_addcarryx_u32(&x561, &x562, x560, ((uint32_t)x500 + x466), + (x536 + x502)); + fiat_secp384r1_mulx_u32(&x563, &x564, x6, 0x2); + fiat_secp384r1_mulx_u32(&x565, &x566, x6, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x567, &x568, x6, 0x2); + fiat_secp384r1_mulx_u32(&x569, &x570, x6, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x571, &x572, 0x0, (fiat_secp384r1_uint1)x564, + x6); + fiat_secp384r1_addcarryx_u32(&x573, &x574, 0x0, x539, x6); + fiat_secp384r1_addcarryx_u32(&x575, &x576, x574, x541, x569); + fiat_secp384r1_addcarryx_u32(&x577, &x578, x576, x543, x570); + fiat_secp384r1_addcarryx_u32(&x579, &x580, x578, x545, x567); + fiat_secp384r1_addcarryx_u32(&x581, &x582, x580, x547, + (fiat_secp384r1_uint1)x568); + fiat_secp384r1_addcarryx_u32(&x583, &x584, x582, x549, x565); + fiat_secp384r1_addcarryx_u32(&x585, &x586, x584, x551, x566); + fiat_secp384r1_addcarryx_u32(&x587, &x588, x586, x553, x563); + fiat_secp384r1_addcarryx_u32(&x589, &x590, x588, x555, x571); + fiat_secp384r1_addcarryx_u32(&x591, &x592, x590, x557, x572); + fiat_secp384r1_addcarryx_u32(&x593, &x594, x592, x559, 0x0); + fiat_secp384r1_addcarryx_u32(&x595, &x596, x594, x561, 0x0); + fiat_secp384r1_mulx_u32(&x597, &x598, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x599, &x600, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x601, &x602, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x603, &x604, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x605, &x606, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x607, &x608, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x609, &x610, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x611, &x612, x573, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x613, &x614, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x615, &x616, x573, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x617, &x618, 0x0, x614, x611); + fiat_secp384r1_addcarryx_u32(&x619, &x620, x618, x612, x609); + fiat_secp384r1_addcarryx_u32(&x621, &x622, x620, x610, x607); + fiat_secp384r1_addcarryx_u32(&x623, &x624, x622, x608, x605); + fiat_secp384r1_addcarryx_u32(&x625, &x626, x624, x606, x603); + fiat_secp384r1_addcarryx_u32(&x627, &x628, x626, x604, x601); + fiat_secp384r1_addcarryx_u32(&x629, &x630, x628, x602, x599); + fiat_secp384r1_addcarryx_u32(&x631, &x632, x630, x600, x597); + fiat_secp384r1_addcarryx_u32(&x633, &x634, 0x0, x573, x615); + fiat_secp384r1_addcarryx_u32(&x635, &x636, x634, x575, x616); + fiat_secp384r1_addcarryx_u32(&x637, &x638, x636, x577, 0x0); + fiat_secp384r1_addcarryx_u32(&x639, &x640, x638, x579, x613); + fiat_secp384r1_addcarryx_u32(&x641, &x642, x640, x581, x617); + fiat_secp384r1_addcarryx_u32(&x643, &x644, x642, x583, x619); + fiat_secp384r1_addcarryx_u32(&x645, &x646, x644, x585, x621); + fiat_secp384r1_addcarryx_u32(&x647, &x648, x646, x587, x623); + fiat_secp384r1_addcarryx_u32(&x649, &x650, x648, x589, x625); + fiat_secp384r1_addcarryx_u32(&x651, &x652, x650, x591, x627); + fiat_secp384r1_addcarryx_u32(&x653, &x654, x652, x593, x629); + fiat_secp384r1_addcarryx_u32(&x655, &x656, x654, x595, x631); + fiat_secp384r1_addcarryx_u32(&x657, &x658, x656, ((uint32_t)x596 + x562), + (x632 + x598)); + fiat_secp384r1_mulx_u32(&x659, &x660, x7, 0x2); + fiat_secp384r1_mulx_u32(&x661, &x662, x7, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x663, &x664, x7, 0x2); + fiat_secp384r1_mulx_u32(&x665, &x666, x7, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x667, &x668, 0x0, (fiat_secp384r1_uint1)x660, + x7); + fiat_secp384r1_addcarryx_u32(&x669, &x670, 0x0, x635, x7); + fiat_secp384r1_addcarryx_u32(&x671, &x672, x670, x637, x665); + fiat_secp384r1_addcarryx_u32(&x673, &x674, x672, x639, x666); + fiat_secp384r1_addcarryx_u32(&x675, &x676, x674, x641, x663); + fiat_secp384r1_addcarryx_u32(&x677, &x678, x676, x643, + (fiat_secp384r1_uint1)x664); + fiat_secp384r1_addcarryx_u32(&x679, &x680, x678, x645, x661); + fiat_secp384r1_addcarryx_u32(&x681, &x682, x680, x647, x662); + fiat_secp384r1_addcarryx_u32(&x683, &x684, x682, x649, x659); + fiat_secp384r1_addcarryx_u32(&x685, &x686, x684, x651, x667); + fiat_secp384r1_addcarryx_u32(&x687, &x688, x686, x653, x668); + fiat_secp384r1_addcarryx_u32(&x689, &x690, x688, x655, 0x0); + fiat_secp384r1_addcarryx_u32(&x691, &x692, x690, x657, 0x0); + fiat_secp384r1_mulx_u32(&x693, &x694, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x695, &x696, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x697, &x698, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x699, &x700, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x701, &x702, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x703, &x704, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x705, &x706, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x707, &x708, x669, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x709, &x710, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x711, &x712, x669, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x713, &x714, 0x0, x710, x707); + fiat_secp384r1_addcarryx_u32(&x715, &x716, x714, x708, x705); + fiat_secp384r1_addcarryx_u32(&x717, &x718, x716, x706, x703); + fiat_secp384r1_addcarryx_u32(&x719, &x720, x718, x704, x701); + fiat_secp384r1_addcarryx_u32(&x721, &x722, x720, x702, x699); + fiat_secp384r1_addcarryx_u32(&x723, &x724, x722, x700, x697); + fiat_secp384r1_addcarryx_u32(&x725, &x726, x724, x698, x695); + fiat_secp384r1_addcarryx_u32(&x727, &x728, x726, x696, x693); + fiat_secp384r1_addcarryx_u32(&x729, &x730, 0x0, x669, x711); + fiat_secp384r1_addcarryx_u32(&x731, &x732, x730, x671, x712); + fiat_secp384r1_addcarryx_u32(&x733, &x734, x732, x673, 0x0); + fiat_secp384r1_addcarryx_u32(&x735, &x736, x734, x675, x709); + fiat_secp384r1_addcarryx_u32(&x737, &x738, x736, x677, x713); + fiat_secp384r1_addcarryx_u32(&x739, &x740, x738, x679, x715); + fiat_secp384r1_addcarryx_u32(&x741, &x742, x740, x681, x717); + fiat_secp384r1_addcarryx_u32(&x743, &x744, x742, x683, x719); + fiat_secp384r1_addcarryx_u32(&x745, &x746, x744, x685, x721); + fiat_secp384r1_addcarryx_u32(&x747, &x748, x746, x687, x723); + fiat_secp384r1_addcarryx_u32(&x749, &x750, x748, x689, x725); + fiat_secp384r1_addcarryx_u32(&x751, &x752, x750, x691, x727); + fiat_secp384r1_addcarryx_u32(&x753, &x754, x752, ((uint32_t)x692 + x658), + (x728 + x694)); + fiat_secp384r1_mulx_u32(&x755, &x756, x8, 0x2); + fiat_secp384r1_mulx_u32(&x757, &x758, x8, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x759, &x760, x8, 0x2); + fiat_secp384r1_mulx_u32(&x761, &x762, x8, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x763, &x764, 0x0, (fiat_secp384r1_uint1)x756, + x8); + fiat_secp384r1_addcarryx_u32(&x765, &x766, 0x0, x731, x8); + fiat_secp384r1_addcarryx_u32(&x767, &x768, x766, x733, x761); + fiat_secp384r1_addcarryx_u32(&x769, &x770, x768, x735, x762); + fiat_secp384r1_addcarryx_u32(&x771, &x772, x770, x737, x759); + fiat_secp384r1_addcarryx_u32(&x773, &x774, x772, x739, + (fiat_secp384r1_uint1)x760); + fiat_secp384r1_addcarryx_u32(&x775, &x776, x774, x741, x757); + fiat_secp384r1_addcarryx_u32(&x777, &x778, x776, x743, x758); + fiat_secp384r1_addcarryx_u32(&x779, &x780, x778, x745, x755); + fiat_secp384r1_addcarryx_u32(&x781, &x782, x780, x747, x763); + fiat_secp384r1_addcarryx_u32(&x783, &x784, x782, x749, x764); + fiat_secp384r1_addcarryx_u32(&x785, &x786, x784, x751, 0x0); + fiat_secp384r1_addcarryx_u32(&x787, &x788, x786, x753, 0x0); + fiat_secp384r1_mulx_u32(&x789, &x790, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x791, &x792, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x793, &x794, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x795, &x796, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x797, &x798, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x799, &x800, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x801, &x802, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x803, &x804, x765, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x805, &x806, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x807, &x808, x765, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x809, &x810, 0x0, x806, x803); + fiat_secp384r1_addcarryx_u32(&x811, &x812, x810, x804, x801); + fiat_secp384r1_addcarryx_u32(&x813, &x814, x812, x802, x799); + fiat_secp384r1_addcarryx_u32(&x815, &x816, x814, x800, x797); + fiat_secp384r1_addcarryx_u32(&x817, &x818, x816, x798, x795); + fiat_secp384r1_addcarryx_u32(&x819, &x820, x818, x796, x793); + fiat_secp384r1_addcarryx_u32(&x821, &x822, x820, x794, x791); + fiat_secp384r1_addcarryx_u32(&x823, &x824, x822, x792, x789); + fiat_secp384r1_addcarryx_u32(&x825, &x826, 0x0, x765, x807); + fiat_secp384r1_addcarryx_u32(&x827, &x828, x826, x767, x808); + fiat_secp384r1_addcarryx_u32(&x829, &x830, x828, x769, 0x0); + fiat_secp384r1_addcarryx_u32(&x831, &x832, x830, x771, x805); + fiat_secp384r1_addcarryx_u32(&x833, &x834, x832, x773, x809); + fiat_secp384r1_addcarryx_u32(&x835, &x836, x834, x775, x811); + fiat_secp384r1_addcarryx_u32(&x837, &x838, x836, x777, x813); + fiat_secp384r1_addcarryx_u32(&x839, &x840, x838, x779, x815); + fiat_secp384r1_addcarryx_u32(&x841, &x842, x840, x781, x817); + fiat_secp384r1_addcarryx_u32(&x843, &x844, x842, x783, x819); + fiat_secp384r1_addcarryx_u32(&x845, &x846, x844, x785, x821); + fiat_secp384r1_addcarryx_u32(&x847, &x848, x846, x787, x823); + fiat_secp384r1_addcarryx_u32(&x849, &x850, x848, ((uint32_t)x788 + x754), + (x824 + x790)); + fiat_secp384r1_mulx_u32(&x851, &x852, x9, 0x2); + fiat_secp384r1_mulx_u32(&x853, &x854, x9, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x855, &x856, x9, 0x2); + fiat_secp384r1_mulx_u32(&x857, &x858, x9, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x859, &x860, 0x0, (fiat_secp384r1_uint1)x852, + x9); + fiat_secp384r1_addcarryx_u32(&x861, &x862, 0x0, x827, x9); + fiat_secp384r1_addcarryx_u32(&x863, &x864, x862, x829, x857); + fiat_secp384r1_addcarryx_u32(&x865, &x866, x864, x831, x858); + fiat_secp384r1_addcarryx_u32(&x867, &x868, x866, x833, x855); + fiat_secp384r1_addcarryx_u32(&x869, &x870, x868, x835, + (fiat_secp384r1_uint1)x856); + fiat_secp384r1_addcarryx_u32(&x871, &x872, x870, x837, x853); + fiat_secp384r1_addcarryx_u32(&x873, &x874, x872, x839, x854); + fiat_secp384r1_addcarryx_u32(&x875, &x876, x874, x841, x851); + fiat_secp384r1_addcarryx_u32(&x877, &x878, x876, x843, x859); + fiat_secp384r1_addcarryx_u32(&x879, &x880, x878, x845, x860); + fiat_secp384r1_addcarryx_u32(&x881, &x882, x880, x847, 0x0); + fiat_secp384r1_addcarryx_u32(&x883, &x884, x882, x849, 0x0); + fiat_secp384r1_mulx_u32(&x885, &x886, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x887, &x888, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x889, &x890, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x891, &x892, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x893, &x894, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x895, &x896, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x897, &x898, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x899, &x900, x861, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x901, &x902, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x903, &x904, x861, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x905, &x906, 0x0, x902, x899); + fiat_secp384r1_addcarryx_u32(&x907, &x908, x906, x900, x897); + fiat_secp384r1_addcarryx_u32(&x909, &x910, x908, x898, x895); + fiat_secp384r1_addcarryx_u32(&x911, &x912, x910, x896, x893); + fiat_secp384r1_addcarryx_u32(&x913, &x914, x912, x894, x891); + fiat_secp384r1_addcarryx_u32(&x915, &x916, x914, x892, x889); + fiat_secp384r1_addcarryx_u32(&x917, &x918, x916, x890, x887); + fiat_secp384r1_addcarryx_u32(&x919, &x920, x918, x888, x885); + fiat_secp384r1_addcarryx_u32(&x921, &x922, 0x0, x861, x903); + fiat_secp384r1_addcarryx_u32(&x923, &x924, x922, x863, x904); + fiat_secp384r1_addcarryx_u32(&x925, &x926, x924, x865, 0x0); + fiat_secp384r1_addcarryx_u32(&x927, &x928, x926, x867, x901); + fiat_secp384r1_addcarryx_u32(&x929, &x930, x928, x869, x905); + fiat_secp384r1_addcarryx_u32(&x931, &x932, x930, x871, x907); + fiat_secp384r1_addcarryx_u32(&x933, &x934, x932, x873, x909); + fiat_secp384r1_addcarryx_u32(&x935, &x936, x934, x875, x911); + fiat_secp384r1_addcarryx_u32(&x937, &x938, x936, x877, x913); + fiat_secp384r1_addcarryx_u32(&x939, &x940, x938, x879, x915); + fiat_secp384r1_addcarryx_u32(&x941, &x942, x940, x881, x917); + fiat_secp384r1_addcarryx_u32(&x943, &x944, x942, x883, x919); + fiat_secp384r1_addcarryx_u32(&x945, &x946, x944, ((uint32_t)x884 + x850), + (x920 + x886)); + fiat_secp384r1_mulx_u32(&x947, &x948, x10, 0x2); + fiat_secp384r1_mulx_u32(&x949, &x950, x10, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x951, &x952, x10, 0x2); + fiat_secp384r1_mulx_u32(&x953, &x954, x10, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x955, &x956, 0x0, (fiat_secp384r1_uint1)x948, + x10); + fiat_secp384r1_addcarryx_u32(&x957, &x958, 0x0, x923, x10); + fiat_secp384r1_addcarryx_u32(&x959, &x960, x958, x925, x953); + fiat_secp384r1_addcarryx_u32(&x961, &x962, x960, x927, x954); + fiat_secp384r1_addcarryx_u32(&x963, &x964, x962, x929, x951); + fiat_secp384r1_addcarryx_u32(&x965, &x966, x964, x931, + (fiat_secp384r1_uint1)x952); + fiat_secp384r1_addcarryx_u32(&x967, &x968, x966, x933, x949); + fiat_secp384r1_addcarryx_u32(&x969, &x970, x968, x935, x950); + fiat_secp384r1_addcarryx_u32(&x971, &x972, x970, x937, x947); + fiat_secp384r1_addcarryx_u32(&x973, &x974, x972, x939, x955); + fiat_secp384r1_addcarryx_u32(&x975, &x976, x974, x941, x956); + fiat_secp384r1_addcarryx_u32(&x977, &x978, x976, x943, 0x0); + fiat_secp384r1_addcarryx_u32(&x979, &x980, x978, x945, 0x0); + fiat_secp384r1_mulx_u32(&x981, &x982, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x983, &x984, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x985, &x986, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x987, &x988, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x989, &x990, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x991, &x992, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x993, &x994, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x995, &x996, x957, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x997, &x998, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x999, &x1000, x957, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1001, &x1002, 0x0, x998, x995); + fiat_secp384r1_addcarryx_u32(&x1003, &x1004, x1002, x996, x993); + fiat_secp384r1_addcarryx_u32(&x1005, &x1006, x1004, x994, x991); + fiat_secp384r1_addcarryx_u32(&x1007, &x1008, x1006, x992, x989); + fiat_secp384r1_addcarryx_u32(&x1009, &x1010, x1008, x990, x987); + fiat_secp384r1_addcarryx_u32(&x1011, &x1012, x1010, x988, x985); + fiat_secp384r1_addcarryx_u32(&x1013, &x1014, x1012, x986, x983); + fiat_secp384r1_addcarryx_u32(&x1015, &x1016, x1014, x984, x981); + fiat_secp384r1_addcarryx_u32(&x1017, &x1018, 0x0, x957, x999); + fiat_secp384r1_addcarryx_u32(&x1019, &x1020, x1018, x959, x1000); + fiat_secp384r1_addcarryx_u32(&x1021, &x1022, x1020, x961, 0x0); + fiat_secp384r1_addcarryx_u32(&x1023, &x1024, x1022, x963, x997); + fiat_secp384r1_addcarryx_u32(&x1025, &x1026, x1024, x965, x1001); + fiat_secp384r1_addcarryx_u32(&x1027, &x1028, x1026, x967, x1003); + fiat_secp384r1_addcarryx_u32(&x1029, &x1030, x1028, x969, x1005); + fiat_secp384r1_addcarryx_u32(&x1031, &x1032, x1030, x971, x1007); + fiat_secp384r1_addcarryx_u32(&x1033, &x1034, x1032, x973, x1009); + fiat_secp384r1_addcarryx_u32(&x1035, &x1036, x1034, x975, x1011); + fiat_secp384r1_addcarryx_u32(&x1037, &x1038, x1036, x977, x1013); + fiat_secp384r1_addcarryx_u32(&x1039, &x1040, x1038, x979, x1015); + fiat_secp384r1_addcarryx_u32(&x1041, &x1042, x1040, ((uint32_t)x980 + x946), + (x1016 + x982)); + fiat_secp384r1_mulx_u32(&x1043, &x1044, x11, 0x2); + fiat_secp384r1_mulx_u32(&x1045, &x1046, x11, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1047, &x1048, x11, 0x2); + fiat_secp384r1_mulx_u32(&x1049, &x1050, x11, UINT32_C(0xfffffffe)); + fiat_secp384r1_addcarryx_u32(&x1051, &x1052, 0x0, + (fiat_secp384r1_uint1)x1044, x11); + fiat_secp384r1_addcarryx_u32(&x1053, &x1054, 0x0, x1019, x11); + fiat_secp384r1_addcarryx_u32(&x1055, &x1056, x1054, x1021, x1049); + fiat_secp384r1_addcarryx_u32(&x1057, &x1058, x1056, x1023, x1050); + fiat_secp384r1_addcarryx_u32(&x1059, &x1060, x1058, x1025, x1047); + fiat_secp384r1_addcarryx_u32(&x1061, &x1062, x1060, x1027, + (fiat_secp384r1_uint1)x1048); + fiat_secp384r1_addcarryx_u32(&x1063, &x1064, x1062, x1029, x1045); + fiat_secp384r1_addcarryx_u32(&x1065, &x1066, x1064, x1031, x1046); + fiat_secp384r1_addcarryx_u32(&x1067, &x1068, x1066, x1033, x1043); + fiat_secp384r1_addcarryx_u32(&x1069, &x1070, x1068, x1035, x1051); + fiat_secp384r1_addcarryx_u32(&x1071, &x1072, x1070, x1037, x1052); + fiat_secp384r1_addcarryx_u32(&x1073, &x1074, x1072, x1039, 0x0); + fiat_secp384r1_addcarryx_u32(&x1075, &x1076, x1074, x1041, 0x0); + fiat_secp384r1_mulx_u32(&x1077, &x1078, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1079, &x1080, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1081, &x1082, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1083, &x1084, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1085, &x1086, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1087, &x1088, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1089, &x1090, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1091, &x1092, x1053, UINT32_C(0xfffffffe)); + fiat_secp384r1_mulx_u32(&x1093, &x1094, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_mulx_u32(&x1095, &x1096, x1053, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x1097, &x1098, 0x0, x1094, x1091); + fiat_secp384r1_addcarryx_u32(&x1099, &x1100, x1098, x1092, x1089); + fiat_secp384r1_addcarryx_u32(&x1101, &x1102, x1100, x1090, x1087); + fiat_secp384r1_addcarryx_u32(&x1103, &x1104, x1102, x1088, x1085); + fiat_secp384r1_addcarryx_u32(&x1105, &x1106, x1104, x1086, x1083); + fiat_secp384r1_addcarryx_u32(&x1107, &x1108, x1106, x1084, x1081); + fiat_secp384r1_addcarryx_u32(&x1109, &x1110, x1108, x1082, x1079); + fiat_secp384r1_addcarryx_u32(&x1111, &x1112, x1110, x1080, x1077); + fiat_secp384r1_addcarryx_u32(&x1113, &x1114, 0x0, x1053, x1095); + fiat_secp384r1_addcarryx_u32(&x1115, &x1116, x1114, x1055, x1096); + fiat_secp384r1_addcarryx_u32(&x1117, &x1118, x1116, x1057, 0x0); + fiat_secp384r1_addcarryx_u32(&x1119, &x1120, x1118, x1059, x1093); + fiat_secp384r1_addcarryx_u32(&x1121, &x1122, x1120, x1061, x1097); + fiat_secp384r1_addcarryx_u32(&x1123, &x1124, x1122, x1063, x1099); + fiat_secp384r1_addcarryx_u32(&x1125, &x1126, x1124, x1065, x1101); + fiat_secp384r1_addcarryx_u32(&x1127, &x1128, x1126, x1067, x1103); + fiat_secp384r1_addcarryx_u32(&x1129, &x1130, x1128, x1069, x1105); + fiat_secp384r1_addcarryx_u32(&x1131, &x1132, x1130, x1071, x1107); + fiat_secp384r1_addcarryx_u32(&x1133, &x1134, x1132, x1073, x1109); + fiat_secp384r1_addcarryx_u32(&x1135, &x1136, x1134, x1075, x1111); + fiat_secp384r1_addcarryx_u32(&x1137, &x1138, x1136, + ((uint32_t)x1076 + x1042), (x1112 + x1078)); + fiat_secp384r1_subborrowx_u32(&x1139, &x1140, 0x0, x1115, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1141, &x1142, x1140, x1117, 0x0); + fiat_secp384r1_subborrowx_u32(&x1143, &x1144, x1142, x1119, 0x0); + fiat_secp384r1_subborrowx_u32(&x1145, &x1146, x1144, x1121, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1147, &x1148, x1146, x1123, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x1149, &x1150, x1148, x1125, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1151, &x1152, x1150, x1127, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1153, &x1154, x1152, x1129, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1155, &x1156, x1154, x1131, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1157, &x1158, x1156, x1133, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1159, &x1160, x1158, x1135, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1161, &x1162, x1160, x1137, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x1163, &x1164, x1162, x1138, 0x0); + fiat_secp384r1_cmovznz_u32(&x1165, x1164, x1139, x1115); + fiat_secp384r1_cmovznz_u32(&x1166, x1164, x1141, x1117); + fiat_secp384r1_cmovznz_u32(&x1167, x1164, x1143, x1119); + fiat_secp384r1_cmovznz_u32(&x1168, x1164, x1145, x1121); + fiat_secp384r1_cmovznz_u32(&x1169, x1164, x1147, x1123); + fiat_secp384r1_cmovznz_u32(&x1170, x1164, x1149, x1125); + fiat_secp384r1_cmovznz_u32(&x1171, x1164, x1151, x1127); + fiat_secp384r1_cmovznz_u32(&x1172, x1164, x1153, x1129); + fiat_secp384r1_cmovznz_u32(&x1173, x1164, x1155, x1131); + fiat_secp384r1_cmovznz_u32(&x1174, x1164, x1157, x1133); + fiat_secp384r1_cmovznz_u32(&x1175, x1164, x1159, x1135); + fiat_secp384r1_cmovznz_u32(&x1176, x1164, x1161, x1137); + out1[0] = x1165; + out1[1] = x1166; + out1[2] = x1167; + out1[3] = x1168; + out1[4] = x1169; + out1[5] = x1170; + out1[6] = x1171; + out1[7] = x1172; + out1[8] = x1173; + out1[9] = x1174; + out1[10] = x1175; + out1[11] = x1176; +} + +/* + * The function fiat_secp384r1_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0 + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + */ +static void +fiat_secp384r1_nonzero(uint32_t *out1, const uint32_t arg1[12]) +{ + uint32_t x1; + x1 = ((arg1[0]) | + ((arg1[1]) | + ((arg1[2]) | + ((arg1[3]) | + ((arg1[4]) | + ((arg1[5]) | + ((arg1[6]) | + ((arg1[7]) | + ((arg1[8]) | ((arg1[9]) | ((arg1[10]) | (arg1[11])))))))))))); + *out1 = x1; +} + +/* + * The function fiat_secp384r1_selectznz is a multi-limb conditional select. + * + * Postconditions: + * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + */ +static void +fiat_secp384r1_selectznz(uint32_t out1[12], + fiat_secp384r1_uint1 arg1, + const uint32_t arg2[12], + const uint32_t arg3[12]) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + fiat_secp384r1_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0])); + fiat_secp384r1_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1])); + fiat_secp384r1_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2])); + fiat_secp384r1_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3])); + fiat_secp384r1_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4])); + fiat_secp384r1_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5])); + fiat_secp384r1_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6])); + fiat_secp384r1_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7])); + fiat_secp384r1_cmovznz_u32(&x9, arg1, (arg2[8]), (arg3[8])); + fiat_secp384r1_cmovznz_u32(&x10, arg1, (arg2[9]), (arg3[9])); + fiat_secp384r1_cmovznz_u32(&x11, arg1, (arg2[10]), (arg3[10])); + fiat_secp384r1_cmovznz_u32(&x12, arg1, (arg2[11]), (arg3[11])); + out1[0] = x1; + out1[1] = x2; + out1[2] = x3; + out1[3] = x4; + out1[4] = x5; + out1[5] = x6; + out1[6] = x7; + out1[7] = x8; + out1[8] = x9; + out1[9] = x10; + out1[10] = x11; + out1[11] = x12; +} + +/* + * The function fiat_secp384r1_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..47] + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]] + */ +static void +fiat_secp384r1_to_bytes(uint8_t out1[48], const uint32_t arg1[12]) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint8_t x13; + uint32_t x14; + uint8_t x15; + uint32_t x16; + uint8_t x17; + uint8_t x18; + uint8_t x19; + uint32_t x20; + uint8_t x21; + uint32_t x22; + uint8_t x23; + uint8_t x24; + uint8_t x25; + uint32_t x26; + uint8_t x27; + uint32_t x28; + uint8_t x29; + uint8_t x30; + uint8_t x31; + uint32_t x32; + uint8_t x33; + uint32_t x34; + uint8_t x35; + uint8_t x36; + uint8_t x37; + uint32_t x38; + uint8_t x39; + uint32_t x40; + uint8_t x41; + uint8_t x42; + uint8_t x43; + uint32_t x44; + uint8_t x45; + uint32_t x46; + uint8_t x47; + uint8_t x48; + uint8_t x49; + uint32_t x50; + uint8_t x51; + uint32_t x52; + uint8_t x53; + uint8_t x54; + uint8_t x55; + uint32_t x56; + uint8_t x57; + uint32_t x58; + uint8_t x59; + uint8_t x60; + uint8_t x61; + uint32_t x62; + uint8_t x63; + uint32_t x64; + uint8_t x65; + uint8_t x66; + uint8_t x67; + uint32_t x68; + uint8_t x69; + uint32_t x70; + uint8_t x71; + uint8_t x72; + uint8_t x73; + uint32_t x74; + uint8_t x75; + uint32_t x76; + uint8_t x77; + uint8_t x78; + uint8_t x79; + uint32_t x80; + uint8_t x81; + uint32_t x82; + uint8_t x83; + uint8_t x84; + x1 = (arg1[11]); + x2 = (arg1[10]); + x3 = (arg1[9]); + x4 = (arg1[8]); + x5 = (arg1[7]); + x6 = (arg1[6]); + x7 = (arg1[5]); + x8 = (arg1[4]); + x9 = (arg1[3]); + x10 = (arg1[2]); + x11 = (arg1[1]); + x12 = (arg1[0]); + x13 = (uint8_t)(x12 & UINT8_C(0xff)); + x14 = (x12 >> 8); + x15 = (uint8_t)(x14 & UINT8_C(0xff)); + x16 = (x14 >> 8); + x17 = (uint8_t)(x16 & UINT8_C(0xff)); + x18 = (uint8_t)(x16 >> 8); + x19 = (uint8_t)(x11 & UINT8_C(0xff)); + x20 = (x11 >> 8); + x21 = (uint8_t)(x20 & UINT8_C(0xff)); + x22 = (x20 >> 8); + x23 = (uint8_t)(x22 & UINT8_C(0xff)); + x24 = (uint8_t)(x22 >> 8); + x25 = (uint8_t)(x10 & UINT8_C(0xff)); + x26 = (x10 >> 8); + x27 = (uint8_t)(x26 & UINT8_C(0xff)); + x28 = (x26 >> 8); + x29 = (uint8_t)(x28 & UINT8_C(0xff)); + x30 = (uint8_t)(x28 >> 8); + x31 = (uint8_t)(x9 & UINT8_C(0xff)); + x32 = (x9 >> 8); + x33 = (uint8_t)(x32 & UINT8_C(0xff)); + x34 = (x32 >> 8); + x35 = (uint8_t)(x34 & UINT8_C(0xff)); + x36 = (uint8_t)(x34 >> 8); + x37 = (uint8_t)(x8 & UINT8_C(0xff)); + x38 = (x8 >> 8); + x39 = (uint8_t)(x38 & UINT8_C(0xff)); + x40 = (x38 >> 8); + x41 = (uint8_t)(x40 & UINT8_C(0xff)); + x42 = (uint8_t)(x40 >> 8); + x43 = (uint8_t)(x7 & UINT8_C(0xff)); + x44 = (x7 >> 8); + x45 = (uint8_t)(x44 & UINT8_C(0xff)); + x46 = (x44 >> 8); + x47 = (uint8_t)(x46 & UINT8_C(0xff)); + x48 = (uint8_t)(x46 >> 8); + x49 = (uint8_t)(x6 & UINT8_C(0xff)); + x50 = (x6 >> 8); + x51 = (uint8_t)(x50 & UINT8_C(0xff)); + x52 = (x50 >> 8); + x53 = (uint8_t)(x52 & UINT8_C(0xff)); + x54 = (uint8_t)(x52 >> 8); + x55 = (uint8_t)(x5 & UINT8_C(0xff)); + x56 = (x5 >> 8); + x57 = (uint8_t)(x56 & UINT8_C(0xff)); + x58 = (x56 >> 8); + x59 = (uint8_t)(x58 & UINT8_C(0xff)); + x60 = (uint8_t)(x58 >> 8); + x61 = (uint8_t)(x4 & UINT8_C(0xff)); + x62 = (x4 >> 8); + x63 = (uint8_t)(x62 & UINT8_C(0xff)); + x64 = (x62 >> 8); + x65 = (uint8_t)(x64 & UINT8_C(0xff)); + x66 = (uint8_t)(x64 >> 8); + x67 = (uint8_t)(x3 & UINT8_C(0xff)); + x68 = (x3 >> 8); + x69 = (uint8_t)(x68 & UINT8_C(0xff)); + x70 = (x68 >> 8); + x71 = (uint8_t)(x70 & UINT8_C(0xff)); + x72 = (uint8_t)(x70 >> 8); + x73 = (uint8_t)(x2 & UINT8_C(0xff)); + x74 = (x2 >> 8); + x75 = (uint8_t)(x74 & UINT8_C(0xff)); + x76 = (x74 >> 8); + x77 = (uint8_t)(x76 & UINT8_C(0xff)); + x78 = (uint8_t)(x76 >> 8); + x79 = (uint8_t)(x1 & UINT8_C(0xff)); + x80 = (x1 >> 8); + x81 = (uint8_t)(x80 & UINT8_C(0xff)); + x82 = (x80 >> 8); + x83 = (uint8_t)(x82 & UINT8_C(0xff)); + x84 = (uint8_t)(x82 >> 8); + out1[0] = x13; + out1[1] = x15; + out1[2] = x17; + out1[3] = x18; + out1[4] = x19; + out1[5] = x21; + out1[6] = x23; + out1[7] = x24; + out1[8] = x25; + out1[9] = x27; + out1[10] = x29; + out1[11] = x30; + out1[12] = x31; + out1[13] = x33; + out1[14] = x35; + out1[15] = x36; + out1[16] = x37; + out1[17] = x39; + out1[18] = x41; + out1[19] = x42; + out1[20] = x43; + out1[21] = x45; + out1[22] = x47; + out1[23] = x48; + out1[24] = x49; + out1[25] = x51; + out1[26] = x53; + out1[27] = x54; + out1[28] = x55; + out1[29] = x57; + out1[30] = x59; + out1[31] = x60; + out1[32] = x61; + out1[33] = x63; + out1[34] = x65; + out1[35] = x66; + out1[36] = x67; + out1[37] = x69; + out1[38] = x71; + out1[39] = x72; + out1[40] = x73; + out1[41] = x75; + out1[42] = x77; + out1[43] = x78; + out1[44] = x79; + out1[45] = x81; + out1[46] = x83; + out1[47] = x84; +} + +/* + * The function fiat_secp384r1_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order. + * + * Preconditions: + * 0 ≤ bytes_eval arg1 < m + * Postconditions: + * eval out1 mod m = bytes_eval arg1 mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + */ +static void +fiat_secp384r1_from_bytes(uint32_t out1[12], + const uint8_t arg1[48]) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint8_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint8_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint8_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint8_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint8_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint8_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint8_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint8_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint8_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint8_t x40; + uint32_t x41; + uint32_t x42; + uint32_t x43; + uint8_t x44; + uint32_t x45; + uint32_t x46; + uint32_t x47; + uint8_t x48; + uint32_t x49; + uint32_t x50; + uint32_t x51; + uint32_t x52; + uint32_t x53; + uint32_t x54; + uint32_t x55; + uint32_t x56; + uint32_t x57; + uint32_t x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + uint32_t x63; + uint32_t x64; + uint32_t x65; + uint32_t x66; + uint32_t x67; + uint32_t x68; + uint32_t x69; + uint32_t x70; + uint32_t x71; + uint32_t x72; + uint32_t x73; + uint32_t x74; + uint32_t x75; + uint32_t x76; + uint32_t x77; + uint32_t x78; + uint32_t x79; + uint32_t x80; + uint32_t x81; + uint32_t x82; + uint32_t x83; + uint32_t x84; + x1 = ((uint32_t)(arg1[47]) << 24); + x2 = ((uint32_t)(arg1[46]) << 16); + x3 = ((uint32_t)(arg1[45]) << 8); + x4 = (arg1[44]); + x5 = ((uint32_t)(arg1[43]) << 24); + x6 = ((uint32_t)(arg1[42]) << 16); + x7 = ((uint32_t)(arg1[41]) << 8); + x8 = (arg1[40]); + x9 = ((uint32_t)(arg1[39]) << 24); + x10 = ((uint32_t)(arg1[38]) << 16); + x11 = ((uint32_t)(arg1[37]) << 8); + x12 = (arg1[36]); + x13 = ((uint32_t)(arg1[35]) << 24); + x14 = ((uint32_t)(arg1[34]) << 16); + x15 = ((uint32_t)(arg1[33]) << 8); + x16 = (arg1[32]); + x17 = ((uint32_t)(arg1[31]) << 24); + x18 = ((uint32_t)(arg1[30]) << 16); + x19 = ((uint32_t)(arg1[29]) << 8); + x20 = (arg1[28]); + x21 = ((uint32_t)(arg1[27]) << 24); + x22 = ((uint32_t)(arg1[26]) << 16); + x23 = ((uint32_t)(arg1[25]) << 8); + x24 = (arg1[24]); + x25 = ((uint32_t)(arg1[23]) << 24); + x26 = ((uint32_t)(arg1[22]) << 16); + x27 = ((uint32_t)(arg1[21]) << 8); + x28 = (arg1[20]); + x29 = ((uint32_t)(arg1[19]) << 24); + x30 = ((uint32_t)(arg1[18]) << 16); + x31 = ((uint32_t)(arg1[17]) << 8); + x32 = (arg1[16]); + x33 = ((uint32_t)(arg1[15]) << 24); + x34 = ((uint32_t)(arg1[14]) << 16); + x35 = ((uint32_t)(arg1[13]) << 8); + x36 = (arg1[12]); + x37 = ((uint32_t)(arg1[11]) << 24); + x38 = ((uint32_t)(arg1[10]) << 16); + x39 = ((uint32_t)(arg1[9]) << 8); + x40 = (arg1[8]); + x41 = ((uint32_t)(arg1[7]) << 24); + x42 = ((uint32_t)(arg1[6]) << 16); + x43 = ((uint32_t)(arg1[5]) << 8); + x44 = (arg1[4]); + x45 = ((uint32_t)(arg1[3]) << 24); + x46 = ((uint32_t)(arg1[2]) << 16); + x47 = ((uint32_t)(arg1[1]) << 8); + x48 = (arg1[0]); + x49 = (x47 + (uint32_t)x48); + x50 = (x46 + x49); + x51 = (x45 + x50); + x52 = (x43 + (uint32_t)x44); + x53 = (x42 + x52); + x54 = (x41 + x53); + x55 = (x39 + (uint32_t)x40); + x56 = (x38 + x55); + x57 = (x37 + x56); + x58 = (x35 + (uint32_t)x36); + x59 = (x34 + x58); + x60 = (x33 + x59); + x61 = (x31 + (uint32_t)x32); + x62 = (x30 + x61); + x63 = (x29 + x62); + x64 = (x27 + (uint32_t)x28); + x65 = (x26 + x64); + x66 = (x25 + x65); + x67 = (x23 + (uint32_t)x24); + x68 = (x22 + x67); + x69 = (x21 + x68); + x70 = (x19 + (uint32_t)x20); + x71 = (x18 + x70); + x72 = (x17 + x71); + x73 = (x15 + (uint32_t)x16); + x74 = (x14 + x73); + x75 = (x13 + x74); + x76 = (x11 + (uint32_t)x12); + x77 = (x10 + x76); + x78 = (x9 + x77); + x79 = (x7 + (uint32_t)x8); + x80 = (x6 + x79); + x81 = (x5 + x80); + x82 = (x3 + (uint32_t)x4); + x83 = (x2 + x82); + x84 = (x1 + x83); + out1[0] = x51; + out1[1] = x54; + out1[2] = x57; + out1[3] = x60; + out1[4] = x63; + out1[5] = x66; + out1[6] = x69; + out1[7] = x72; + out1[8] = x75; + out1[9] = x78; + out1[10] = x81; + out1[11] = x84; +} + +/* + * The function fiat_secp384r1_divstep computes a divstep. + * + * Preconditions: + * 0 ≤ eval arg4 < m + * 0 ≤ eval arg5 < m + * Postconditions: + * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1) + * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2) + * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋) + * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m) + * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m) + * 0 ≤ eval out5 < m + * 0 ≤ eval out5 < m + * 0 ≤ eval out2 < m + * 0 ≤ eval out3 < m + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffff] + * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + * out2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * out3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * out4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * out5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + */ +static void +fiat_secp384r1_divstep( + uint32_t *out1, uint32_t out2[13], uint32_t out3[13], uint32_t out4[12], + uint32_t out5[12], uint32_t arg1, const uint32_t arg2[13], + const uint32_t arg3[13], const uint32_t arg4[12], const uint32_t arg5[12]) +{ + uint32_t x1; + fiat_secp384r1_uint1 x2; + fiat_secp384r1_uint1 x3; + uint32_t x4; + fiat_secp384r1_uint1 x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + fiat_secp384r1_uint1 x21; + uint32_t x22; + fiat_secp384r1_uint1 x23; + uint32_t x24; + fiat_secp384r1_uint1 x25; + uint32_t x26; + fiat_secp384r1_uint1 x27; + uint32_t x28; + fiat_secp384r1_uint1 x29; + uint32_t x30; + fiat_secp384r1_uint1 x31; + uint32_t x32; + fiat_secp384r1_uint1 x33; + uint32_t x34; + fiat_secp384r1_uint1 x35; + uint32_t x36; + fiat_secp384r1_uint1 x37; + uint32_t x38; + fiat_secp384r1_uint1 x39; + uint32_t x40; + fiat_secp384r1_uint1 x41; + uint32_t x42; + fiat_secp384r1_uint1 x43; + uint32_t x44; + fiat_secp384r1_uint1 x45; + uint32_t x46; + uint32_t x47; + uint32_t x48; + uint32_t x49; + uint32_t x50; + uint32_t x51; + uint32_t x52; + uint32_t x53; + uint32_t x54; + uint32_t x55; + uint32_t x56; + uint32_t x57; + uint32_t x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + uint32_t x63; + uint32_t x64; + uint32_t x65; + uint32_t x66; + uint32_t x67; + uint32_t x68; + uint32_t x69; + uint32_t x70; + uint32_t x71; + fiat_secp384r1_uint1 x72; + uint32_t x73; + fiat_secp384r1_uint1 x74; + uint32_t x75; + fiat_secp384r1_uint1 x76; + uint32_t x77; + fiat_secp384r1_uint1 x78; + uint32_t x79; + fiat_secp384r1_uint1 x80; + uint32_t x81; + fiat_secp384r1_uint1 x82; + uint32_t x83; + fiat_secp384r1_uint1 x84; + uint32_t x85; + fiat_secp384r1_uint1 x86; + uint32_t x87; + fiat_secp384r1_uint1 x88; + uint32_t x89; + fiat_secp384r1_uint1 x90; + uint32_t x91; + fiat_secp384r1_uint1 x92; + uint32_t x93; + fiat_secp384r1_uint1 x94; + uint32_t x95; + fiat_secp384r1_uint1 x96; + uint32_t x97; + fiat_secp384r1_uint1 x98; + uint32_t x99; + fiat_secp384r1_uint1 x100; + uint32_t x101; + fiat_secp384r1_uint1 x102; + uint32_t x103; + fiat_secp384r1_uint1 x104; + uint32_t x105; + fiat_secp384r1_uint1 x106; + uint32_t x107; + fiat_secp384r1_uint1 x108; + uint32_t x109; + fiat_secp384r1_uint1 x110; + uint32_t x111; + fiat_secp384r1_uint1 x112; + uint32_t x113; + fiat_secp384r1_uint1 x114; + uint32_t x115; + fiat_secp384r1_uint1 x116; + uint32_t x117; + fiat_secp384r1_uint1 x118; + uint32_t x119; + fiat_secp384r1_uint1 x120; + uint32_t x121; + uint32_t x122; + uint32_t x123; + uint32_t x124; + uint32_t x125; + uint32_t x126; + uint32_t x127; + uint32_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint32_t x133; + fiat_secp384r1_uint1 x134; + uint32_t x135; + fiat_secp384r1_uint1 x136; + uint32_t x137; + fiat_secp384r1_uint1 x138; + uint32_t x139; + fiat_secp384r1_uint1 x140; + uint32_t x141; + fiat_secp384r1_uint1 x142; + uint32_t x143; + fiat_secp384r1_uint1 x144; + uint32_t x145; + fiat_secp384r1_uint1 x146; + uint32_t x147; + fiat_secp384r1_uint1 x148; + uint32_t x149; + fiat_secp384r1_uint1 x150; + uint32_t x151; + fiat_secp384r1_uint1 x152; + uint32_t x153; + fiat_secp384r1_uint1 x154; + uint32_t x155; + fiat_secp384r1_uint1 x156; + uint32_t x157; + uint32_t x158; + fiat_secp384r1_uint1 x159; + uint32_t x160; + fiat_secp384r1_uint1 x161; + uint32_t x162; + fiat_secp384r1_uint1 x163; + uint32_t x164; + fiat_secp384r1_uint1 x165; + uint32_t x166; + fiat_secp384r1_uint1 x167; + uint32_t x168; + fiat_secp384r1_uint1 x169; + uint32_t x170; + fiat_secp384r1_uint1 x171; + uint32_t x172; + fiat_secp384r1_uint1 x173; + uint32_t x174; + fiat_secp384r1_uint1 x175; + uint32_t x176; + fiat_secp384r1_uint1 x177; + uint32_t x178; + fiat_secp384r1_uint1 x179; + uint32_t x180; + fiat_secp384r1_uint1 x181; + uint32_t x182; + uint32_t x183; + uint32_t x184; + uint32_t x185; + uint32_t x186; + uint32_t x187; + uint32_t x188; + uint32_t x189; + uint32_t x190; + uint32_t x191; + uint32_t x192; + uint32_t x193; + fiat_secp384r1_uint1 x194; + uint32_t x195; + uint32_t x196; + uint32_t x197; + uint32_t x198; + uint32_t x199; + uint32_t x200; + uint32_t x201; + uint32_t x202; + uint32_t x203; + uint32_t x204; + uint32_t x205; + uint32_t x206; + uint32_t x207; + uint32_t x208; + fiat_secp384r1_uint1 x209; + uint32_t x210; + fiat_secp384r1_uint1 x211; + uint32_t x212; + fiat_secp384r1_uint1 x213; + uint32_t x214; + fiat_secp384r1_uint1 x215; + uint32_t x216; + fiat_secp384r1_uint1 x217; + uint32_t x218; + fiat_secp384r1_uint1 x219; + uint32_t x220; + fiat_secp384r1_uint1 x221; + uint32_t x222; + fiat_secp384r1_uint1 x223; + uint32_t x224; + fiat_secp384r1_uint1 x225; + uint32_t x226; + fiat_secp384r1_uint1 x227; + uint32_t x228; + fiat_secp384r1_uint1 x229; + uint32_t x230; + fiat_secp384r1_uint1 x231; + uint32_t x232; + fiat_secp384r1_uint1 x233; + uint32_t x234; + uint32_t x235; + uint32_t x236; + uint32_t x237; + uint32_t x238; + uint32_t x239; + uint32_t x240; + uint32_t x241; + uint32_t x242; + uint32_t x243; + uint32_t x244; + uint32_t x245; + uint32_t x246; + fiat_secp384r1_uint1 x247; + uint32_t x248; + fiat_secp384r1_uint1 x249; + uint32_t x250; + fiat_secp384r1_uint1 x251; + uint32_t x252; + fiat_secp384r1_uint1 x253; + uint32_t x254; + fiat_secp384r1_uint1 x255; + uint32_t x256; + fiat_secp384r1_uint1 x257; + uint32_t x258; + fiat_secp384r1_uint1 x259; + uint32_t x260; + fiat_secp384r1_uint1 x261; + uint32_t x262; + fiat_secp384r1_uint1 x263; + uint32_t x264; + fiat_secp384r1_uint1 x265; + uint32_t x266; + fiat_secp384r1_uint1 x267; + uint32_t x268; + fiat_secp384r1_uint1 x269; + uint32_t x270; + fiat_secp384r1_uint1 x271; + uint32_t x272; + fiat_secp384r1_uint1 x273; + uint32_t x274; + fiat_secp384r1_uint1 x275; + uint32_t x276; + fiat_secp384r1_uint1 x277; + uint32_t x278; + fiat_secp384r1_uint1 x279; + uint32_t x280; + fiat_secp384r1_uint1 x281; + uint32_t x282; + fiat_secp384r1_uint1 x283; + uint32_t x284; + fiat_secp384r1_uint1 x285; + uint32_t x286; + fiat_secp384r1_uint1 x287; + uint32_t x288; + fiat_secp384r1_uint1 x289; + uint32_t x290; + fiat_secp384r1_uint1 x291; + uint32_t x292; + fiat_secp384r1_uint1 x293; + uint32_t x294; + fiat_secp384r1_uint1 x295; + uint32_t x296; + fiat_secp384r1_uint1 x297; + uint32_t x298; + uint32_t x299; + uint32_t x300; + uint32_t x301; + uint32_t x302; + uint32_t x303; + uint32_t x304; + uint32_t x305; + uint32_t x306; + uint32_t x307; + uint32_t x308; + uint32_t x309; + uint32_t x310; + uint32_t x311; + uint32_t x312; + uint32_t x313; + uint32_t x314; + uint32_t x315; + uint32_t x316; + uint32_t x317; + uint32_t x318; + uint32_t x319; + uint32_t x320; + uint32_t x321; + uint32_t x322; + uint32_t x323; + uint32_t x324; + uint32_t x325; + uint32_t x326; + uint32_t x327; + uint32_t x328; + uint32_t x329; + uint32_t x330; + uint32_t x331; + uint32_t x332; + uint32_t x333; + uint32_t x334; + fiat_secp384r1_addcarryx_u32(&x1, &x2, 0x0, (~arg1), 0x1); + x3 = (fiat_secp384r1_uint1)((fiat_secp384r1_uint1)(x1 >> 31) & + (fiat_secp384r1_uint1)((arg3[0]) & 0x1)); + fiat_secp384r1_addcarryx_u32(&x4, &x5, 0x0, (~arg1), 0x1); + fiat_secp384r1_cmovznz_u32(&x6, x3, arg1, x4); + fiat_secp384r1_cmovznz_u32(&x7, x3, (arg2[0]), (arg3[0])); + fiat_secp384r1_cmovznz_u32(&x8, x3, (arg2[1]), (arg3[1])); + fiat_secp384r1_cmovznz_u32(&x9, x3, (arg2[2]), (arg3[2])); + fiat_secp384r1_cmovznz_u32(&x10, x3, (arg2[3]), (arg3[3])); + fiat_secp384r1_cmovznz_u32(&x11, x3, (arg2[4]), (arg3[4])); + fiat_secp384r1_cmovznz_u32(&x12, x3, (arg2[5]), (arg3[5])); + fiat_secp384r1_cmovznz_u32(&x13, x3, (arg2[6]), (arg3[6])); + fiat_secp384r1_cmovznz_u32(&x14, x3, (arg2[7]), (arg3[7])); + fiat_secp384r1_cmovznz_u32(&x15, x3, (arg2[8]), (arg3[8])); + fiat_secp384r1_cmovznz_u32(&x16, x3, (arg2[9]), (arg3[9])); + fiat_secp384r1_cmovznz_u32(&x17, x3, (arg2[10]), (arg3[10])); + fiat_secp384r1_cmovznz_u32(&x18, x3, (arg2[11]), (arg3[11])); + fiat_secp384r1_cmovznz_u32(&x19, x3, (arg2[12]), (arg3[12])); + fiat_secp384r1_addcarryx_u32(&x20, &x21, 0x0, 0x1, (~(arg2[0]))); + fiat_secp384r1_addcarryx_u32(&x22, &x23, x21, 0x0, (~(arg2[1]))); + fiat_secp384r1_addcarryx_u32(&x24, &x25, x23, 0x0, (~(arg2[2]))); + fiat_secp384r1_addcarryx_u32(&x26, &x27, x25, 0x0, (~(arg2[3]))); + fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, 0x0, (~(arg2[4]))); + fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, 0x0, (~(arg2[5]))); + fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, 0x0, (~(arg2[6]))); + fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, 0x0, (~(arg2[7]))); + fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, 0x0, (~(arg2[8]))); + fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, 0x0, (~(arg2[9]))); + fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, 0x0, (~(arg2[10]))); + fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, 0x0, (~(arg2[11]))); + fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, 0x0, (~(arg2[12]))); + fiat_secp384r1_cmovznz_u32(&x46, x3, (arg3[0]), x20); + fiat_secp384r1_cmovznz_u32(&x47, x3, (arg3[1]), x22); + fiat_secp384r1_cmovznz_u32(&x48, x3, (arg3[2]), x24); + fiat_secp384r1_cmovznz_u32(&x49, x3, (arg3[3]), x26); + fiat_secp384r1_cmovznz_u32(&x50, x3, (arg3[4]), x28); + fiat_secp384r1_cmovznz_u32(&x51, x3, (arg3[5]), x30); + fiat_secp384r1_cmovznz_u32(&x52, x3, (arg3[6]), x32); + fiat_secp384r1_cmovznz_u32(&x53, x3, (arg3[7]), x34); + fiat_secp384r1_cmovznz_u32(&x54, x3, (arg3[8]), x36); + fiat_secp384r1_cmovznz_u32(&x55, x3, (arg3[9]), x38); + fiat_secp384r1_cmovznz_u32(&x56, x3, (arg3[10]), x40); + fiat_secp384r1_cmovznz_u32(&x57, x3, (arg3[11]), x42); + fiat_secp384r1_cmovznz_u32(&x58, x3, (arg3[12]), x44); + fiat_secp384r1_cmovznz_u32(&x59, x3, (arg4[0]), (arg5[0])); + fiat_secp384r1_cmovznz_u32(&x60, x3, (arg4[1]), (arg5[1])); + fiat_secp384r1_cmovznz_u32(&x61, x3, (arg4[2]), (arg5[2])); + fiat_secp384r1_cmovznz_u32(&x62, x3, (arg4[3]), (arg5[3])); + fiat_secp384r1_cmovznz_u32(&x63, x3, (arg4[4]), (arg5[4])); + fiat_secp384r1_cmovznz_u32(&x64, x3, (arg4[5]), (arg5[5])); + fiat_secp384r1_cmovznz_u32(&x65, x3, (arg4[6]), (arg5[6])); + fiat_secp384r1_cmovznz_u32(&x66, x3, (arg4[7]), (arg5[7])); + fiat_secp384r1_cmovznz_u32(&x67, x3, (arg4[8]), (arg5[8])); + fiat_secp384r1_cmovznz_u32(&x68, x3, (arg4[9]), (arg5[9])); + fiat_secp384r1_cmovznz_u32(&x69, x3, (arg4[10]), (arg5[10])); + fiat_secp384r1_cmovznz_u32(&x70, x3, (arg4[11]), (arg5[11])); + fiat_secp384r1_addcarryx_u32(&x71, &x72, 0x0, x59, x59); + fiat_secp384r1_addcarryx_u32(&x73, &x74, x72, x60, x60); + fiat_secp384r1_addcarryx_u32(&x75, &x76, x74, x61, x61); + fiat_secp384r1_addcarryx_u32(&x77, &x78, x76, x62, x62); + fiat_secp384r1_addcarryx_u32(&x79, &x80, x78, x63, x63); + fiat_secp384r1_addcarryx_u32(&x81, &x82, x80, x64, x64); + fiat_secp384r1_addcarryx_u32(&x83, &x84, x82, x65, x65); + fiat_secp384r1_addcarryx_u32(&x85, &x86, x84, x66, x66); + fiat_secp384r1_addcarryx_u32(&x87, &x88, x86, x67, x67); + fiat_secp384r1_addcarryx_u32(&x89, &x90, x88, x68, x68); + fiat_secp384r1_addcarryx_u32(&x91, &x92, x90, x69, x69); + fiat_secp384r1_addcarryx_u32(&x93, &x94, x92, x70, x70); + fiat_secp384r1_subborrowx_u32(&x95, &x96, 0x0, x71, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x97, &x98, x96, x73, 0x0); + fiat_secp384r1_subborrowx_u32(&x99, &x100, x98, x75, 0x0); + fiat_secp384r1_subborrowx_u32(&x101, &x102, x100, x77, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x103, &x104, x102, x79, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x105, &x106, x104, x81, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x107, &x108, x106, x83, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x109, &x110, x108, x85, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x111, &x112, x110, x87, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x113, &x114, x112, x89, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x115, &x116, x114, x91, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x117, &x118, x116, x93, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x119, &x120, x118, x94, 0x0); + x121 = (arg4[11]); + x122 = (arg4[10]); + x123 = (arg4[9]); + x124 = (arg4[8]); + x125 = (arg4[7]); + x126 = (arg4[6]); + x127 = (arg4[5]); + x128 = (arg4[4]); + x129 = (arg4[3]); + x130 = (arg4[2]); + x131 = (arg4[1]); + x132 = (arg4[0]); + fiat_secp384r1_subborrowx_u32(&x133, &x134, 0x0, 0x0, x132); + fiat_secp384r1_subborrowx_u32(&x135, &x136, x134, 0x0, x131); + fiat_secp384r1_subborrowx_u32(&x137, &x138, x136, 0x0, x130); + fiat_secp384r1_subborrowx_u32(&x139, &x140, x138, 0x0, x129); + fiat_secp384r1_subborrowx_u32(&x141, &x142, x140, 0x0, x128); + fiat_secp384r1_subborrowx_u32(&x143, &x144, x142, 0x0, x127); + fiat_secp384r1_subborrowx_u32(&x145, &x146, x144, 0x0, x126); + fiat_secp384r1_subborrowx_u32(&x147, &x148, x146, 0x0, x125); + fiat_secp384r1_subborrowx_u32(&x149, &x150, x148, 0x0, x124); + fiat_secp384r1_subborrowx_u32(&x151, &x152, x150, 0x0, x123); + fiat_secp384r1_subborrowx_u32(&x153, &x154, x152, 0x0, x122); + fiat_secp384r1_subborrowx_u32(&x155, &x156, x154, 0x0, x121); + fiat_secp384r1_cmovznz_u32(&x157, x156, 0x0, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x158, &x159, 0x0, x133, x157); + fiat_secp384r1_addcarryx_u32(&x160, &x161, x159, x135, 0x0); + fiat_secp384r1_addcarryx_u32(&x162, &x163, x161, x137, 0x0); + fiat_secp384r1_addcarryx_u32(&x164, &x165, x163, x139, x157); + fiat_secp384r1_addcarryx_u32(&x166, &x167, x165, x141, + (x157 & UINT32_C(0xfffffffe))); + fiat_secp384r1_addcarryx_u32(&x168, &x169, x167, x143, x157); + fiat_secp384r1_addcarryx_u32(&x170, &x171, x169, x145, x157); + fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x147, x157); + fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x149, x157); + fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x151, x157); + fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x153, x157); + fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x155, x157); + fiat_secp384r1_cmovznz_u32(&x182, x3, (arg5[0]), x158); + fiat_secp384r1_cmovznz_u32(&x183, x3, (arg5[1]), x160); + fiat_secp384r1_cmovznz_u32(&x184, x3, (arg5[2]), x162); + fiat_secp384r1_cmovznz_u32(&x185, x3, (arg5[3]), x164); + fiat_secp384r1_cmovznz_u32(&x186, x3, (arg5[4]), x166); + fiat_secp384r1_cmovznz_u32(&x187, x3, (arg5[5]), x168); + fiat_secp384r1_cmovznz_u32(&x188, x3, (arg5[6]), x170); + fiat_secp384r1_cmovznz_u32(&x189, x3, (arg5[7]), x172); + fiat_secp384r1_cmovznz_u32(&x190, x3, (arg5[8]), x174); + fiat_secp384r1_cmovznz_u32(&x191, x3, (arg5[9]), x176); + fiat_secp384r1_cmovznz_u32(&x192, x3, (arg5[10]), x178); + fiat_secp384r1_cmovznz_u32(&x193, x3, (arg5[11]), x180); + x194 = (fiat_secp384r1_uint1)(x46 & 0x1); + fiat_secp384r1_cmovznz_u32(&x195, x194, 0x0, x7); + fiat_secp384r1_cmovznz_u32(&x196, x194, 0x0, x8); + fiat_secp384r1_cmovznz_u32(&x197, x194, 0x0, x9); + fiat_secp384r1_cmovznz_u32(&x198, x194, 0x0, x10); + fiat_secp384r1_cmovznz_u32(&x199, x194, 0x0, x11); + fiat_secp384r1_cmovznz_u32(&x200, x194, 0x0, x12); + fiat_secp384r1_cmovznz_u32(&x201, x194, 0x0, x13); + fiat_secp384r1_cmovznz_u32(&x202, x194, 0x0, x14); + fiat_secp384r1_cmovznz_u32(&x203, x194, 0x0, x15); + fiat_secp384r1_cmovznz_u32(&x204, x194, 0x0, x16); + fiat_secp384r1_cmovznz_u32(&x205, x194, 0x0, x17); + fiat_secp384r1_cmovznz_u32(&x206, x194, 0x0, x18); + fiat_secp384r1_cmovznz_u32(&x207, x194, 0x0, x19); + fiat_secp384r1_addcarryx_u32(&x208, &x209, 0x0, x46, x195); + fiat_secp384r1_addcarryx_u32(&x210, &x211, x209, x47, x196); + fiat_secp384r1_addcarryx_u32(&x212, &x213, x211, x48, x197); + fiat_secp384r1_addcarryx_u32(&x214, &x215, x213, x49, x198); + fiat_secp384r1_addcarryx_u32(&x216, &x217, x215, x50, x199); + fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x51, x200); + fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x52, x201); + fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x53, x202); + fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x54, x203); + fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x55, x204); + fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x56, x205); + fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x57, x206); + fiat_secp384r1_addcarryx_u32(&x232, &x233, x231, x58, x207); + fiat_secp384r1_cmovznz_u32(&x234, x194, 0x0, x59); + fiat_secp384r1_cmovznz_u32(&x235, x194, 0x0, x60); + fiat_secp384r1_cmovznz_u32(&x236, x194, 0x0, x61); + fiat_secp384r1_cmovznz_u32(&x237, x194, 0x0, x62); + fiat_secp384r1_cmovznz_u32(&x238, x194, 0x0, x63); + fiat_secp384r1_cmovznz_u32(&x239, x194, 0x0, x64); + fiat_secp384r1_cmovznz_u32(&x240, x194, 0x0, x65); + fiat_secp384r1_cmovznz_u32(&x241, x194, 0x0, x66); + fiat_secp384r1_cmovznz_u32(&x242, x194, 0x0, x67); + fiat_secp384r1_cmovznz_u32(&x243, x194, 0x0, x68); + fiat_secp384r1_cmovznz_u32(&x244, x194, 0x0, x69); + fiat_secp384r1_cmovznz_u32(&x245, x194, 0x0, x70); + fiat_secp384r1_addcarryx_u32(&x246, &x247, 0x0, x182, x234); + fiat_secp384r1_addcarryx_u32(&x248, &x249, x247, x183, x235); + fiat_secp384r1_addcarryx_u32(&x250, &x251, x249, x184, x236); + fiat_secp384r1_addcarryx_u32(&x252, &x253, x251, x185, x237); + fiat_secp384r1_addcarryx_u32(&x254, &x255, x253, x186, x238); + fiat_secp384r1_addcarryx_u32(&x256, &x257, x255, x187, x239); + fiat_secp384r1_addcarryx_u32(&x258, &x259, x257, x188, x240); + fiat_secp384r1_addcarryx_u32(&x260, &x261, x259, x189, x241); + fiat_secp384r1_addcarryx_u32(&x262, &x263, x261, x190, x242); + fiat_secp384r1_addcarryx_u32(&x264, &x265, x263, x191, x243); + fiat_secp384r1_addcarryx_u32(&x266, &x267, x265, x192, x244); + fiat_secp384r1_addcarryx_u32(&x268, &x269, x267, x193, x245); + fiat_secp384r1_subborrowx_u32(&x270, &x271, 0x0, x246, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x272, &x273, x271, x248, 0x0); + fiat_secp384r1_subborrowx_u32(&x274, &x275, x273, x250, 0x0); + fiat_secp384r1_subborrowx_u32(&x276, &x277, x275, x252, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x278, &x279, x277, x254, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x280, &x281, x279, x256, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x282, &x283, x281, x258, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x284, &x285, x283, x260, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x286, &x287, x285, x262, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x288, &x289, x287, x264, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x290, &x291, x289, x266, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x292, &x293, x291, x268, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x294, &x295, x293, x269, 0x0); + fiat_secp384r1_addcarryx_u32(&x296, &x297, 0x0, x6, 0x1); + x298 = ((x208 >> 1) | ((x210 << 31) & UINT32_C(0xffffffff))); + x299 = ((x210 >> 1) | ((x212 << 31) & UINT32_C(0xffffffff))); + x300 = ((x212 >> 1) | ((x214 << 31) & UINT32_C(0xffffffff))); + x301 = ((x214 >> 1) | ((x216 << 31) & UINT32_C(0xffffffff))); + x302 = ((x216 >> 1) | ((x218 << 31) & UINT32_C(0xffffffff))); + x303 = ((x218 >> 1) | ((x220 << 31) & UINT32_C(0xffffffff))); + x304 = ((x220 >> 1) | ((x222 << 31) & UINT32_C(0xffffffff))); + x305 = ((x222 >> 1) | ((x224 << 31) & UINT32_C(0xffffffff))); + x306 = ((x224 >> 1) | ((x226 << 31) & UINT32_C(0xffffffff))); + x307 = ((x226 >> 1) | ((x228 << 31) & UINT32_C(0xffffffff))); + x308 = ((x228 >> 1) | ((x230 << 31) & UINT32_C(0xffffffff))); + x309 = ((x230 >> 1) | ((x232 << 31) & UINT32_C(0xffffffff))); + x310 = ((x232 & UINT32_C(0x80000000)) | (x232 >> 1)); + fiat_secp384r1_cmovznz_u32(&x311, x120, x95, x71); + fiat_secp384r1_cmovznz_u32(&x312, x120, x97, x73); + fiat_secp384r1_cmovznz_u32(&x313, x120, x99, x75); + fiat_secp384r1_cmovznz_u32(&x314, x120, x101, x77); + fiat_secp384r1_cmovznz_u32(&x315, x120, x103, x79); + fiat_secp384r1_cmovznz_u32(&x316, x120, x105, x81); + fiat_secp384r1_cmovznz_u32(&x317, x120, x107, x83); + fiat_secp384r1_cmovznz_u32(&x318, x120, x109, x85); + fiat_secp384r1_cmovznz_u32(&x319, x120, x111, x87); + fiat_secp384r1_cmovznz_u32(&x320, x120, x113, x89); + fiat_secp384r1_cmovznz_u32(&x321, x120, x115, x91); + fiat_secp384r1_cmovznz_u32(&x322, x120, x117, x93); + fiat_secp384r1_cmovznz_u32(&x323, x295, x270, x246); + fiat_secp384r1_cmovznz_u32(&x324, x295, x272, x248); + fiat_secp384r1_cmovznz_u32(&x325, x295, x274, x250); + fiat_secp384r1_cmovznz_u32(&x326, x295, x276, x252); + fiat_secp384r1_cmovznz_u32(&x327, x295, x278, x254); + fiat_secp384r1_cmovznz_u32(&x328, x295, x280, x256); + fiat_secp384r1_cmovznz_u32(&x329, x295, x282, x258); + fiat_secp384r1_cmovznz_u32(&x330, x295, x284, x260); + fiat_secp384r1_cmovznz_u32(&x331, x295, x286, x262); + fiat_secp384r1_cmovznz_u32(&x332, x295, x288, x264); + fiat_secp384r1_cmovznz_u32(&x333, x295, x290, x266); + fiat_secp384r1_cmovznz_u32(&x334, x295, x292, x268); + *out1 = x296; + out2[0] = x7; + out2[1] = x8; + out2[2] = x9; + out2[3] = x10; + out2[4] = x11; + out2[5] = x12; + out2[6] = x13; + out2[7] = x14; + out2[8] = x15; + out2[9] = x16; + out2[10] = x17; + out2[11] = x18; + out2[12] = x19; + out3[0] = x298; + out3[1] = x299; + out3[2] = x300; + out3[3] = x301; + out3[4] = x302; + out3[5] = x303; + out3[6] = x304; + out3[7] = x305; + out3[8] = x306; + out3[9] = x307; + out3[10] = x308; + out3[11] = x309; + out3[12] = x310; + out4[0] = x311; + out4[1] = x312; + out4[2] = x313; + out4[3] = x314; + out4[4] = x315; + out4[5] = x316; + out4[6] = x317; + out4[7] = x318; + out4[8] = x319; + out4[9] = x320; + out4[10] = x321; + out4[11] = x322; + out5[0] = x323; + out5[1] = x324; + out5[2] = x325; + out5[3] = x326; + out5[4] = x327; + out5[5] = x328; + out5[6] = x329; + out5[7] = x330; + out5[8] = x331; + out5[9] = x332; + out5[10] = x333; + out5[11] = x334; +} + +/* END verbatim fiat code */ + +/* curve-related constants */ + +static const limb_t const_one[12] = { + UINT32_C(0x00000001), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), + UINT32_C(0x00000000), UINT32_C(0x00000001), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000) +}; + +static const limb_t const_b[12] = { + UINT32_C(0x9D412DCC), UINT32_C(0x08118871), UINT32_C(0x7A4C32EC), + UINT32_C(0xF729ADD8), UINT32_C(0x1920022E), UINT32_C(0x77F2209B), + UINT32_C(0x94938AE2), UINT32_C(0xE3374BEE), UINT32_C(0x1F022094), + UINT32_C(0xB62B21F4), UINT32_C(0x604FBFF9), UINT32_C(0xCD08114B) +}; + +static const limb_t const_divstep[12] = { + UINT32_C(0x00005000), UINT32_C(0xFFFFC800), UINT32_C(0xFFFF83FF), + UINT32_C(0xFFFFB3FF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFF7FF), + UINT32_C(0xFFFFEFFF), UINT32_C(0xFFFFEBFF), UINT32_C(0xFFFFF3FF), + UINT32_C(0x00000BFF), UINT32_C(0x00003000), UINT32_C(0x00005000) +}; + +static const limb_t const_psat[12] = { + UINT32_C(0xFFFFFFFF), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFE), UINT32_C(0xFFFFFFFF), + UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), + UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF) +}; + +/* LUT for scalar multiplication by comb interleaving */ +static const pt_aff_t lut_cmb[21][16] = { + { + { { UINT32_C(0x49C0B528), UINT32_C(0x3DD07566), UINT32_C(0xA0D6CE38), + UINT32_C(0x20E378E2), UINT32_C(0x541B4D6E), UINT32_C(0x879C3AFC), + UINT32_C(0x59A30EFF), UINT32_C(0x64548684), UINT32_C(0x614EDE2B), + UINT32_C(0x812FF723), UINT32_C(0x299E1513), UINT32_C(0x4D3AADC2) }, + { UINT32_C(0x4B03A4FE), UINT32_C(0x23043DAD), UINT32_C(0x7BB4A9AC), + UINT32_C(0xA1BFA8BF), UINT32_C(0x2E83B050), UINT32_C(0x8BADE756), + UINT32_C(0x68F4FFD9), UINT32_C(0xC6C35219), UINT32_C(0x3969A840), + UINT32_C(0xDD800226), UINT32_C(0x5A15C5E9), UINT32_C(0x2B78ABC2) } }, + { { UINT32_C(0xC1DC4073), UINT32_C(0x05E4DBE6), UINT32_C(0xF04F779C), + UINT32_C(0xC54EA9FF), UINT32_C(0xA170CCF0), UINT32_C(0x6B2034E9), + UINT32_C(0xD51C6C3E), UINT32_C(0x3A48D732), UINT32_C(0x263AA470), + UINT32_C(0xE36F7E2D), UINT32_C(0xE7C1C3AC), UINT32_C(0xD283FE68) }, + { UINT32_C(0xC04EE157), UINT32_C(0x7E284821), UINT32_C(0x7AE0E36D), + UINT32_C(0x92D789A7), UINT32_C(0x4EF67446), UINT32_C(0x132663C0), + UINT32_C(0xD2E1D0B4), UINT32_C(0x68012D5A), UINT32_C(0x5102B339), + UINT32_C(0xF6DB68B1), UINT32_C(0x983292AF), UINT32_C(0x465465FC) } }, + { { UINT32_C(0x68F1F0DF), UINT32_C(0xBB595EBA), UINT32_C(0xCC873466), + UINT32_C(0xC185C0CB), UINT32_C(0x293C703B), UINT32_C(0x7F1EB1B5), + UINT32_C(0xAACC05E6), UINT32_C(0x60DB2CF5), UINT32_C(0xE2E8E4C6), + UINT32_C(0xC676B987), UINT32_C(0x1D178FFB), UINT32_C(0xE1BB26B1) }, + { UINT32_C(0x7073FA21), UINT32_C(0x2B694BA0), UINT32_C(0x72F34566), + UINT32_C(0x22C16E2E), UINT32_C(0x01C35B99), UINT32_C(0x80B61B31), + UINT32_C(0x982C0411), UINT32_C(0x4B237FAF), UINT32_C(0x24DE236D), + UINT32_C(0xE6C59440), UINT32_C(0xE209E4A3), UINT32_C(0x4DB1C9D6) } }, + { { UINT32_C(0x7D69222B), UINT32_C(0xDF13B9D1), UINT32_C(0x874774B1), + UINT32_C(0x4CE6415F), UINT32_C(0x211FAA95), UINT32_C(0x731EDCF8), + UINT32_C(0x659753ED), UINT32_C(0x5F4215D1), UINT32_C(0x9DB2DF55), + UINT32_C(0xF893DB58), UINT32_C(0x1C89025B), UINT32_C(0x932C9F81) }, + { UINT32_C(0x7706A61E), UINT32_C(0x0996B220), UINT32_C(0xA8641C79), + UINT32_C(0x135349D5), UINT32_C(0x50130844), UINT32_C(0x65AAD76F), + UINT32_C(0x01FFF780), UINT32_C(0x0FF37C04), UINT32_C(0x693B0706), + UINT32_C(0xF57F238E), UINT32_C(0xAF6C9B3E), UINT32_C(0xD90A16B6) } }, + { { UINT32_C(0x2353B92F), UINT32_C(0x2F5D200E), UINT32_C(0x3FD7E4F9), + UINT32_C(0xE35D8729), UINT32_C(0xA96D745D), UINT32_C(0x26094833), + UINT32_C(0x3CBFFF3F), UINT32_C(0xDC351DC1), UINT32_C(0xDAD54D6A), + UINT32_C(0x26D464C6), UINT32_C(0x53636C6A), UINT32_C(0x5CAB1D1D) }, + { UINT32_C(0xB18EC0B0), UINT32_C(0xF2813072), UINT32_C(0xD742AA2F), + UINT32_C(0x3777E270), UINT32_C(0x033CA7C2), UINT32_C(0x27F061C7), + UINT32_C(0x68EAD0D8), UINT32_C(0xA6ECACCC), UINT32_C(0xEE69A754), + UINT32_C(0x7D9429F4), UINT32_C(0x31E8F5C6), UINT32_C(0xE7706334) } }, + { { UINT32_C(0xB68B8C7D), UINT32_C(0xC7708B19), UINT32_C(0x44377ABA), + UINT32_C(0x4532077C), UINT32_C(0x6CDAD64F), UINT32_C(0x0DCC6770), + UINT32_C(0x147B6602), UINT32_C(0x01B8BF56), UINT32_C(0xF0561D79), + UINT32_C(0xF8D89885), UINT32_C(0x7BA9C437), UINT32_C(0x9C19E9FC) }, + { UINT32_C(0xBDC4BA25), UINT32_C(0x764EB146), UINT32_C(0xAC144B83), + UINT32_C(0x604FE46B), UINT32_C(0x8A77E780), UINT32_C(0x3CE81329), + UINT32_C(0xFE9E682E), UINT32_C(0x2E070F36), UINT32_C(0x3A53287A), + UINT32_C(0x41821D0C), UINT32_C(0x3533F918), UINT32_C(0x9AA62F9F) } }, + { { UINT32_C(0x75CCBDFB), UINT32_C(0x9B7AEB7E), UINT32_C(0xF6749A95), + UINT32_C(0xB25E28C5), UINT32_C(0x33B7D4AE), UINT32_C(0x8A7A8E46), + UINT32_C(0xD9C1BD56), UINT32_C(0xDB5203A8), UINT32_C(0xED22DF97), + UINT32_C(0xD2657265), UINT32_C(0x8CF23C94), UINT32_C(0xB51C56E1) }, + { UINT32_C(0x6C3D812D), UINT32_C(0xF4D39459), UINT32_C(0x87CAE0C2), + UINT32_C(0xD8E88F1A), UINT32_C(0xCF4D0FE3), UINT32_C(0x789A2A48), + UINT32_C(0xFEC38D60), UINT32_C(0xB7FEAC2D), UINT32_C(0x3B490EC3), + UINT32_C(0x81FDBD1C), UINT32_C(0xCC6979E1), UINT32_C(0x4617ADB7) } }, + { { UINT32_C(0x4709F4A9), UINT32_C(0x446AD888), UINT32_C(0xEC3DABD8), + UINT32_C(0x2B7210E2), UINT32_C(0x50E07B34), UINT32_C(0x83CCF195), + UINT32_C(0x789B3075), UINT32_C(0x59500917), UINT32_C(0xEB085993), + UINT32_C(0x0FC01FD4), UINT32_C(0x4903026B), UINT32_C(0xFB62D26F) }, + { UINT32_C(0x6FE989BB), UINT32_C(0x2309CC9D), UINT32_C(0x144BD586), + UINT32_C(0x61609CBD), UINT32_C(0xDE06610C), UINT32_C(0x4B23D3A0), + UINT32_C(0xD898F470), UINT32_C(0xDDDC2866), UINT32_C(0x400C5797), + UINT32_C(0x8733FC41), UINT32_C(0xD0BC2716), UINT32_C(0x5A68C6FE) } }, + { { UINT32_C(0x4B4A3CD0), UINT32_C(0x8903E130), UINT32_C(0x8FF1F43E), + UINT32_C(0x3EA4EA4C), UINT32_C(0xF655A10D), UINT32_C(0xE6FC3F2A), + UINT32_C(0x524FFEFC), UINT32_C(0x7BE3737D), UINT32_C(0x5330455E), + UINT32_C(0x9F692855), UINT32_C(0xE475CE70), UINT32_C(0x524F166E) }, + { UINT32_C(0x6C12F055), UINT32_C(0x3FCC69CD), UINT32_C(0xD5B9C0DA), + UINT32_C(0x4E23B6FF), UINT32_C(0x336BF183), UINT32_C(0x49CE6993), + UINT32_C(0x4A54504A), UINT32_C(0xF87D6D85), UINT32_C(0xB3C2677A), + UINT32_C(0x25EB5DF1), UINT32_C(0x55B164C9), UINT32_C(0xAC37986F) } }, + { { UINT32_C(0xBAA84C08), UINT32_C(0x82A2ED4A), UINT32_C(0x41A8C912), + UINT32_C(0x22C4CC5F), UINT32_C(0x154AAD5E), UINT32_C(0xCA109C3B), + UINT32_C(0xFC38538E), UINT32_C(0x23891298), UINT32_C(0x539802AE), + UINT32_C(0xB3B6639C), UINT32_C(0x0390D706), UINT32_C(0xFA0F1F45) }, + { UINT32_C(0xB0DC21D0), UINT32_C(0x46B78E5D), UINT32_C(0xC3DA2EAC), + UINT32_C(0xA8C72D3C), UINT32_C(0x6FF2F643), UINT32_C(0x9170B378), + UINT32_C(0xB67F30C3), UINT32_C(0x3F5A799B), UINT32_C(0x8264B672), + UINT32_C(0x15D1DC77), UINT32_C(0xE9577764), UINT32_C(0xA1D47B23) } }, + { { UINT32_C(0x0422CE2F), UINT32_C(0x08265E51), UINT32_C(0xDD2F9E21), + UINT32_C(0x88E0D496), UINT32_C(0x6177F75D), UINT32_C(0x30128AA0), + UINT32_C(0xBD9EBE69), UINT32_C(0x2E59AB62), UINT32_C(0x5DF0E537), + UINT32_C(0x1B1A0F6C), UINT32_C(0xDAC012B5), UINT32_C(0xAB16C626) }, + { UINT32_C(0x008C5DE7), UINT32_C(0x8014214B), UINT32_C(0x38F17BEA), + UINT32_C(0xAA740A9E), UINT32_C(0x8A149098), UINT32_C(0x262EBB49), + UINT32_C(0x8527CD59), UINT32_C(0xB454111E), UINT32_C(0xACEA5817), + UINT32_C(0x266AD15A), UINT32_C(0x1353CCBA), UINT32_C(0x21824F41) } }, + { { UINT32_C(0x12E3683B), UINT32_C(0xD1B4E74D), UINT32_C(0x569B8EF6), + UINT32_C(0x990ED20B), UINT32_C(0x429C0A18), UINT32_C(0xB9D3DD25), + UINT32_C(0x2A351783), UINT32_C(0x1C75B8AB), UINT32_C(0x905432F0), + UINT32_C(0x61E4CA2B), UINT32_C(0xEEA8F224), UINT32_C(0x80826A69) }, + { UINT32_C(0xEC52ABAD), UINT32_C(0x7FC33A6B), UINT32_C(0xA65E4813), + UINT32_C(0x0BCCA3F0), UINT32_C(0xA527CEBE), UINT32_C(0x7AD8A132), + UINT32_C(0xEAF22C7E), UINT32_C(0xF0138950), UINT32_C(0x566718C1), + UINT32_C(0x282D2437), UINT32_C(0xE2212559), UINT32_C(0x9DFCCB0D) } }, + { { UINT32_C(0x58CE3B83), UINT32_C(0x1E937227), UINT32_C(0x3CB3FB36), + UINT32_C(0xBB280DFA), UINT32_C(0xE2BE174A), UINT32_C(0x57D0F3D2), + UINT32_C(0x208ABE1E), UINT32_C(0x9BD51B99), UINT32_C(0xDE248024), + UINT32_C(0x3809AB50), UINT32_C(0xA5BB7331), UINT32_C(0xC29C6E2C) }, + { UINT32_C(0x61124F05), UINT32_C(0x9944FD2E), UINT32_C(0x9009E391), + UINT32_C(0x83CCBC4E), UINT32_C(0x9424A3CC), UINT32_C(0x01628F05), + UINT32_C(0xEA8E4344), UINT32_C(0xD6A2F51D), UINT32_C(0x4CEBC96E), + UINT32_C(0xDA3E1A3D), UINT32_C(0xE97809DC), UINT32_C(0x1FE6FB42) } }, + { { UINT32_C(0x467D66E4), UINT32_C(0xA04482D2), UINT32_C(0x4D78291D), + UINT32_C(0xCF191293), UINT32_C(0x482396F9), UINT32_C(0x8E0D4168), + UINT32_C(0xD18F14D0), UINT32_C(0x7228E2D5), UINT32_C(0x9C6A58FE), + UINT32_C(0x2F7E8D50), UINT32_C(0x373E5AEC), UINT32_C(0xE8CA780E) }, + { UINT32_C(0x1B68E9F8), UINT32_C(0x42AAD1D6), UINT32_C(0x69E2F8F4), + UINT32_C(0x58A6D7F5), UINT32_C(0x31DA1BEA), UINT32_C(0xD779ADFE), + UINT32_C(0x38C85A85), UINT32_C(0x7D265406), UINT32_C(0xD44D3CDF), + UINT32_C(0x67E67195), UINT32_C(0xC5134ED7), UINT32_C(0x17820A0B) } }, + { { UINT32_C(0xD3021470), UINT32_C(0x019D6AC5), UINT32_C(0x780443D6), + UINT32_C(0x25846B66), UINT32_C(0x55C97647), UINT32_C(0xCE3C15ED), + UINT32_C(0x0E3FEB0F), UINT32_C(0x3DC22D49), UINT32_C(0xA7DF26E4), + UINT32_C(0x2065B7CB), UINT32_C(0x187CEA1F), UINT32_C(0xC8B00AE8) }, + { UINT32_C(0x865DDED3), UINT32_C(0x1A5284A0), UINT32_C(0x20C83DE2), + UINT32_C(0x293C1649), UINT32_C(0xCCE851B3), UINT32_C(0xAB178D26), + UINT32_C(0x404505FB), UINT32_C(0x8E6DB10B), UINT32_C(0x90C82033), + UINT32_C(0xF6F57E71), UINT32_C(0x5977F16C), UINT32_C(0x1D2A1C01) } }, + { { UINT32_C(0x7C8906A4), UINT32_C(0xA39C8931), UINT32_C(0x9E821EE6), + UINT32_C(0xB6E7ECDD), UINT32_C(0xF0DF4FE6), UINT32_C(0x2ECF8340), + UINT32_C(0x53C14965), UINT32_C(0xD42F7DC9), UINT32_C(0xE3BA8285), + UINT32_C(0x1AFB51A3), UINT32_C(0x0A3305D1), UINT32_C(0x6C07C404) }, + { UINT32_C(0x127FC1DA), UINT32_C(0xDAB83288), UINT32_C(0x374C4B08), + UINT32_C(0xBC0A699B), UINT32_C(0x42EB20DD), UINT32_C(0x402A9BAB), + UINT32_C(0x045A7A1C), UINT32_C(0xD7DD464F), UINT32_C(0x36BEECC4), + UINT32_C(0x5B3D0D6D), UINT32_C(0x6398A19D), UINT32_C(0x475A3E75) } }, + }, + { + { { UINT32_C(0x72876AE8), UINT32_C(0x31BDB483), UINT32_C(0x961ED1BF), + UINT32_C(0xE3325D98), UINT32_C(0x9B6FC64D), UINT32_C(0x18C04246), + UINT32_C(0x15786B8C), UINT32_C(0x0DCC15FA), UINT32_C(0x8E63DA4A), + UINT32_C(0x81ACDB06), UINT32_C(0xDADA70FB), UINT32_C(0xD3A4B643) }, + { UINT32_C(0xDEA424EB), UINT32_C(0x46361AFE), UINT32_C(0x89B92970), + UINT32_C(0xDC2D2CAE), UINT32_C(0x615694E6), UINT32_C(0xF389B61B), + UINT32_C(0x872951D2), UINT32_C(0x7036DEF1), UINT32_C(0xD93BADC7), + UINT32_C(0x40FD3BDA), UINT32_C(0x380A68D3), UINT32_C(0x45AB6321) } }, + { { UINT32_C(0x81A2703A), UINT32_C(0x23C1F744), UINT32_C(0xB9859136), + UINT32_C(0x1A5D075C), UINT32_C(0x5AFD1BFD), UINT32_C(0xA4F82C9D), + UINT32_C(0xF89D76FE), UINT32_C(0xA3D1E9A4), UINT32_C(0x75702F80), + UINT32_C(0x964F7050), UINT32_C(0xF56C089D), UINT32_C(0x182BF349) }, + { UINT32_C(0xBE0DA6E1), UINT32_C(0xE205FA8F), UINT32_C(0x0A40F8F3), + UINT32_C(0x32905EB9), UINT32_C(0x356D4395), UINT32_C(0x331A1004), + UINT32_C(0xFDBBDFDE), UINT32_C(0x58B78901), UINT32_C(0x9BA00E71), + UINT32_C(0xA52A1597), UINT32_C(0x55497A30), UINT32_C(0xE0092E1F) } }, + { { UINT32_C(0x70EE8F39), UINT32_C(0x5562A856), UINT32_C(0x64E52A9C), + UINT32_C(0x86B0C117), UINT32_C(0x09C75B8C), UINT32_C(0xC19F3174), + UINT32_C(0x24923F80), UINT32_C(0x21C7CC31), UINT32_C(0x8F5B291E), + UINT32_C(0xE63FE47F), UINT32_C(0x0DC08B05), UINT32_C(0x3D6D3C05) }, + { UINT32_C(0xEE0C39A1), UINT32_C(0x58AE455E), UINT32_C(0x0AD97942), + UINT32_C(0x78BEA431), UINT32_C(0x3EE3989C), UINT32_C(0x42C7C97F), + UINT32_C(0xF38759AE), UINT32_C(0xC1B03AF5), UINT32_C(0xBCF46899), + UINT32_C(0x1A673C75), UINT32_C(0x8D508C7D), UINT32_C(0x4831B7D3) } }, + { { UINT32_C(0xC552E354), UINT32_C(0x76512D1B), UINT32_C(0x273020FD), + UINT32_C(0x2B7EB6DF), UINT32_C(0x025A5F25), UINT32_C(0xD1C73AA8), + UINT32_C(0x5CBD2A40), UINT32_C(0x2ABA1929), UINT32_C(0xC88D61C6), + UINT32_C(0xB53CADC3), UINT32_C(0x098290F3), UINT32_C(0x7E66A95E) }, + { UINT32_C(0xAF4C5073), UINT32_C(0x72800ECB), UINT32_C(0x9DC63FAF), + UINT32_C(0x81F2725E), UINT32_C(0x282BA9D1), UINT32_C(0x14BF92A7), + UINT32_C(0xBD5F1BB2), UINT32_C(0x90629672), UINT32_C(0xA97C6C96), + UINT32_C(0x362F68EB), UINT32_C(0x7EA9D601), UINT32_C(0xB1D3BB8B) } }, + { { UINT32_C(0xA9C94429), UINT32_C(0x73878F7F), UINT32_C(0x456CA6D8), + UINT32_C(0xB35C3BC8), UINT32_C(0xF721923A), UINT32_C(0xD96F0B3C), + UINT32_C(0xE6D44FA1), UINT32_C(0x28D8F06C), UINT32_C(0xD5CD671A), + UINT32_C(0x94EFDCDC), UINT32_C(0x3F97D481), UINT32_C(0x0299AB93) }, + { UINT32_C(0x2FD1D324), UINT32_C(0xB7CED6EA), UINT32_C(0x7E932EC2), + UINT32_C(0xBD683208), UINT32_C(0xCB755A6E), UINT32_C(0x24ED31FB), + UINT32_C(0xE48781D2), UINT32_C(0xA636098E), UINT32_C(0xF0A4F297), + UINT32_C(0x8687C63C), UINT32_C(0x07478526), UINT32_C(0xBB523440) } }, + { { UINT32_C(0x34124B56), UINT32_C(0x2E5F7419), UINT32_C(0x4B3F02CA), + UINT32_C(0x1F223AE1), UINT32_C(0xE8336C7E), UINT32_C(0x6345B427), + UINT32_C(0xF5D0E3D0), UINT32_C(0x92123E16), UINT32_C(0x45E79F3A), + UINT32_C(0xDAF0D14D), UINT32_C(0x6F3BD0C6), UINT32_C(0x6ACA6765) }, + { UINT32_C(0x403813F4), UINT32_C(0xF6169FAB), UINT32_C(0x334A4C59), + UINT32_C(0x31DC39C0), UINT32_C(0xD589866D), UINT32_C(0x74C46753), + UINT32_C(0x984C6A5D), UINT32_C(0x5741511D), UINT32_C(0x97FED2D3), + UINT32_C(0xF2631287), UINT32_C(0x11614886), UINT32_C(0x5687CA1B) } }, + { { UINT32_C(0x33836D4B), UINT32_C(0x076D902A), UINT32_C(0x24AFB557), + UINT32_C(0xEC6C5C43), UINT32_C(0xA0516A0F), UINT32_C(0xA0FE2D1C), + UINT32_C(0x00D22ECC), UINT32_C(0x6FB8D737), UINT32_C(0xDAF1D7B3), + UINT32_C(0xF1DE9077), UINT32_C(0xD4C0C1EB), UINT32_C(0xE4695F77) }, + { UINT32_C(0xB4375573), UINT32_C(0x5F0FD8A8), UINT32_C(0x5E50944F), + UINT32_C(0x76238359), UINT32_C(0x635CD76F), UINT32_C(0x65EA2F28), + UINT32_C(0x25FDE7B0), UINT32_C(0x08547769), UINT32_C(0x51944304), + UINT32_C(0xB2345A2E), UINT32_C(0xA16C980D), UINT32_C(0x86EFA2F7) } }, + { { UINT32_C(0xBF4D1D63), UINT32_C(0x4CCBE2D0), UINT32_C(0x397366D5), + UINT32_C(0x32E33401), UINT32_C(0x71BDA2CE), UINT32_C(0xC83AFDDE), + UINT32_C(0x478ED9E6), UINT32_C(0x8DACE2AC), UINT32_C(0x763FDD9E), + UINT32_C(0x3AC6A559), UINT32_C(0xB398558F), UINT32_C(0x0FFDB04C) }, + { UINT32_C(0xAFB9D6B8), UINT32_C(0x6C1B99B2), UINT32_C(0x27F815DD), + UINT32_C(0x572BA39C), UINT32_C(0x0DBCF842), UINT32_C(0x9DE73EE7), + UINT32_C(0x29267B88), UINT32_C(0x2A3ED589), UINT32_C(0x15EBBBB3), + UINT32_C(0xD46A7FD3), UINT32_C(0xE29400C7), UINT32_C(0xD1D01863) } }, + { { UINT32_C(0xE1F89EC5), UINT32_C(0x8FB101D1), UINT32_C(0xF8508042), + UINT32_C(0xB87A1F53), UINT32_C(0x0ED7BEEF), UINT32_C(0x28C8DB24), + UINT32_C(0xACE8660A), UINT32_C(0x3940F845), UINT32_C(0xC6D453FD), + UINT32_C(0x4EACB619), UINT32_C(0x2BAD6160), UINT32_C(0x2E044C98) }, + { UINT32_C(0x80B16C02), UINT32_C(0x87928548), UINT32_C(0xC0A9EB64), + UINT32_C(0xF0D4BEB3), UINT32_C(0xC183C195), UINT32_C(0xD785B4AF), + UINT32_C(0x5E6C46EA), UINT32_C(0x23AAB0E6), UINT32_C(0xA930FECA), + UINT32_C(0x30F7E104), UINT32_C(0xD55C10FB), UINT32_C(0x6A1A7B8B) } }, + { { UINT32_C(0xDBFED1AA), UINT32_C(0xDA74EAEB), UINT32_C(0xDF0B025C), + UINT32_C(0xC8A59223), UINT32_C(0xD5B627F7), UINT32_C(0x7EF7DC85), + UINT32_C(0x197D7624), UINT32_C(0x02A13AE1), UINT32_C(0x2F785A9B), + UINT32_C(0x119E9BE1), UINT32_C(0x00D6B219), UINT32_C(0xC0B7572F) }, + { UINT32_C(0x6D4CAF30), UINT32_C(0x9B1E5126), UINT32_C(0x0A840BD1), + UINT32_C(0xA16A5117), UINT32_C(0x0E9CCF43), UINT32_C(0x5BE17B91), + UINT32_C(0x69CF2C9C), UINT32_C(0x5BDBEDDD), UINT32_C(0x4CF4F289), + UINT32_C(0x9FFBFBCF), UINT32_C(0x6C355CE9), UINT32_C(0xE1A62183) } }, + { { UINT32_C(0xA7B2FCCF), UINT32_C(0x056199D9), UINT32_C(0xCE1D784E), + UINT32_C(0x51F2E7B6), UINT32_C(0x339E2FF0), UINT32_C(0xA1D09C47), + UINT32_C(0xB836D0A9), UINT32_C(0xC8E64890), UINT32_C(0xC0D07EBE), + UINT32_C(0x2F781DCB), UINT32_C(0x3ACF934C), UINT32_C(0x5CF3C2AD) }, + { UINT32_C(0xA17E26AE), UINT32_C(0xE55DB190), UINT32_C(0x91245513), + UINT32_C(0xC9C61E1F), UINT32_C(0x61998C15), UINT32_C(0x83D7E6CF), + UINT32_C(0xE41D38E3), UINT32_C(0x4DB33C85), UINT32_C(0xC2FEE43D), + UINT32_C(0x74D5F91D), UINT32_C(0x36BBC826), UINT32_C(0x7EBBDB45) } }, + { { UINT32_C(0xCB655A9D), UINT32_C(0xE20EC7E9), UINT32_C(0x5C47D421), + UINT32_C(0x4977EB92), UINT32_C(0x3B9D72FA), UINT32_C(0xA237E12C), + UINT32_C(0xCBF7B145), UINT32_C(0xCAAEDBC1), UINT32_C(0x3B77AAA3), + UINT32_C(0x5200F5B2), UINT32_C(0xBDBE5380), UINT32_C(0x32EDED55) }, + { UINT32_C(0xE7C9B80A), UINT32_C(0x74E38A40), UINT32_C(0xAB6DE911), + UINT32_C(0x3A3F0CF8), UINT32_C(0xAD16AAF0), UINT32_C(0x56DCDD7A), + UINT32_C(0x8E861D5E), UINT32_C(0x3D292449), UINT32_C(0x985733E2), + UINT32_C(0xD6C61878), UINT32_C(0x6AA6CD5B), UINT32_C(0x2401FE7D) } }, + { { UINT32_C(0xB42E3686), UINT32_C(0xABB3DC75), UINT32_C(0xB4C57E61), + UINT32_C(0xAE712419), UINT32_C(0xB21B009B), UINT32_C(0x2C565F72), + UINT32_C(0x710C3699), UINT32_C(0xA5F1DA2E), UINT32_C(0xA5EBA59A), + UINT32_C(0x771099A0), UINT32_C(0xC10017A0), UINT32_C(0x4DA88F4A) }, + { UINT32_C(0x1927B56D), UINT32_C(0x987FFFD3), UINT32_C(0xC4E33478), + UINT32_C(0xB98CB8EC), UINT32_C(0xC2248166), UINT32_C(0xB224A971), + UINT32_C(0xDE1DC794), UINT32_C(0x5470F554), UINT32_C(0xE31FF983), + UINT32_C(0xD747CC24), UINT32_C(0xB5B22DAE), UINT32_C(0xB91745E9) } }, + { { UINT32_C(0x72F34420), UINT32_C(0x6CCBFED0), UINT32_C(0xA53039D2), + UINT32_C(0x95045E4D), UINT32_C(0x5A793944), UINT32_C(0x3B6C1154), + UINT32_C(0xDDB6B799), UINT32_C(0xAA114145), UINT32_C(0x252B7637), + UINT32_C(0xABC15CA4), UINT32_C(0xA5744634), UINT32_C(0x5745A35B) }, + { UINT32_C(0xDA596FC0), UINT32_C(0x05DC6BDE), UINT32_C(0xA8020881), + UINT32_C(0xCD52C18C), UINT32_C(0xD296BAD0), UINT32_C(0x03FA9F47), + UINT32_C(0x7268E139), UINT32_C(0xD8E2C129), UINT32_C(0x9EC450B0), + UINT32_C(0x58C1A98D), UINT32_C(0xDE48B20D), UINT32_C(0x909638DA) } }, + { { UINT32_C(0x9B7F8311), UINT32_C(0x7AFC30D4), UINT32_C(0x42368EA3), + UINT32_C(0x82A00422), UINT32_C(0x6F5F9865), UINT32_C(0xBFF95198), + UINT32_C(0xFC0A070F), UINT32_C(0x9B24F612), UINT32_C(0x620F489D), + UINT32_C(0x22C06CF2), UINT32_C(0x780F7DBB), UINT32_C(0x3C7ED052) }, + { UINT32_C(0x34DAFE9B), UINT32_C(0xDB87AB18), UINT32_C(0x9C4BBCA1), + UINT32_C(0x20C03B40), UINT32_C(0x59A42341), UINT32_C(0x5D718CF0), + UINT32_C(0x69E84538), UINT32_C(0x98631706), UINT32_C(0xD27D64E1), + UINT32_C(0x5557192B), UINT32_C(0xDA822766), UINT32_C(0x08B4EC52) } }, + { { UINT32_C(0xD66C1A59), UINT32_C(0xB2D986F6), UINT32_C(0x78E0E423), + UINT32_C(0x927DEB16), UINT32_C(0x49C3DEDC), UINT32_C(0x9E673CDE), + UINT32_C(0xF7ECB6CF), UINT32_C(0xFA362D84), UINT32_C(0x1BA17340), + UINT32_C(0x078E5F40), UINT32_C(0x1F4E489C), UINT32_C(0x934CA5D1) }, + { UINT32_C(0x64EEF493), UINT32_C(0xC03C0731), UINT32_C(0xD7931A7E), + UINT32_C(0x631A353B), UINT32_C(0x65DD74F1), UINT32_C(0x8E7CC3BB), + UINT32_C(0x702676A5), UINT32_C(0xD55864C5), UINT32_C(0x439F04BD), + UINT32_C(0x6D306AC4), UINT32_C(0x2BAFED57), UINT32_C(0x58544F67) } }, + }, + { + { { UINT32_C(0xEC074AEA), UINT32_C(0xB083BA6A), UINT32_C(0x7F0B505B), + UINT32_C(0x46FAC5EF), UINT32_C(0xFC82DC03), UINT32_C(0x95367A21), + UINT32_C(0x9D3679D8), UINT32_C(0x227BE26A), UINT32_C(0x7E9724C0), + UINT32_C(0xC70F6D6C), UINT32_C(0xF9EBEC0F), UINT32_C(0xCD68C757) }, + { UINT32_C(0x8FF321B2), UINT32_C(0x29DDE03E), UINT32_C(0x031939DC), + UINT32_C(0xF84AD7BB), UINT32_C(0x0F602F4B), UINT32_C(0xDAF590C9), + UINT32_C(0x49722BC4), UINT32_C(0x17C52888), UINT32_C(0x089B22B6), + UINT32_C(0xA8DF99F0), UINT32_C(0xE59B9B90), UINT32_C(0xC21BC5D4) } }, + { { UINT32_C(0x8A31973F), UINT32_C(0x4936C6A0), UINT32_C(0x83B8C205), + UINT32_C(0x54D442FA), UINT32_C(0x5714F2C6), UINT32_C(0x03AEE8B4), + UINT32_C(0x3F5AC25A), UINT32_C(0x139BD692), UINT32_C(0xB5B33794), + UINT32_C(0x6A2E42BA), UINT32_C(0x3FF7BBA9), UINT32_C(0x50FA1164) }, + { UINT32_C(0xF7E2C099), UINT32_C(0xB61D8643), UINT32_C(0xBD5C6637), + UINT32_C(0x2366C993), UINT32_C(0x72EB77FA), UINT32_C(0x62110E14), + UINT32_C(0x3B99C635), UINT32_C(0x3D5B96F1), UINT32_C(0xF674C9F2), + UINT32_C(0x956ECF64), UINT32_C(0xEF2BA250), UINT32_C(0xC56F7E51) } }, + { { UINT32_C(0xFF602C1B), UINT32_C(0x246FFCB6), UINT32_C(0x6E1258E0), + UINT32_C(0x1E1A1D74), UINT32_C(0x250E6676), UINT32_C(0xB4B43AE2), + UINT32_C(0x924CE5FA), UINT32_C(0x95C1B5F0), UINT32_C(0xEBD8C776), + UINT32_C(0x2555795B), UINT32_C(0xACD9D9D0), UINT32_C(0x4C1E03DC) }, + { UINT32_C(0x9CE90C61), UINT32_C(0xE1D74AA6), UINT32_C(0xA9C4B9F9), + UINT32_C(0xA88C0769), UINT32_C(0x95AF56DE), UINT32_C(0xDF74DF27), + UINT32_C(0xB331B6F4), UINT32_C(0x24B10C5F), UINT32_C(0x6559E137), + UINT32_C(0xB0A6DF9A), UINT32_C(0xC06637F2), UINT32_C(0x6ACC1B8F) } }, + { { UINT32_C(0x34B4E381), UINT32_C(0xBD8C0868), UINT32_C(0x30DFF271), + UINT32_C(0x278CACC7), UINT32_C(0x02459389), UINT32_C(0x87ED12DE), + UINT32_C(0xDEF840B6), UINT32_C(0x3F7D98FF), UINT32_C(0x5F0B56E1), + UINT32_C(0x71EEE0CB), UINT32_C(0xD8D9BE87), UINT32_C(0x462B5C9B) }, + { UINT32_C(0x98094C0F), UINT32_C(0xE6B50B5A), UINT32_C(0x508C67CE), + UINT32_C(0x26F3B274), UINT32_C(0x7CB1F992), UINT32_C(0x418B1BD1), + UINT32_C(0x4FF11827), UINT32_C(0x607818ED), UINT32_C(0x9B042C63), + UINT32_C(0xE630D93A), UINT32_C(0x8C779AE3), UINT32_C(0x38B9EFF3) } }, + { { UINT32_C(0x729C5431), UINT32_C(0xE8767D36), UINT32_C(0xBB94642C), + UINT32_C(0xA8BD07C0), UINT32_C(0x58F2E5B2), UINT32_C(0x0C11FC8E), + UINT32_C(0x547533FE), UINT32_C(0xD8912D48), UINT32_C(0x230D91FB), + UINT32_C(0xAAE14F5E), UINT32_C(0x676DFBA0), UINT32_C(0xC122051A) }, + { UINT32_C(0x5EA93078), UINT32_C(0x9ED4501F), UINT32_C(0xBD4BEE0A), + UINT32_C(0x2758515C), UINT32_C(0x94D21F52), UINT32_C(0x97733C6C), + UINT32_C(0x4AD306A2), UINT32_C(0x139BCD6D), UINT32_C(0x298123CC), + UINT32_C(0x0AAECBDC), UINT32_C(0x1CB7C7C9), UINT32_C(0x102B8A31) } }, + { { UINT32_C(0xFAF46675), UINT32_C(0x22A28E59), UINT32_C(0x10A31E7D), + UINT32_C(0x10757308), UINT32_C(0x2B4C2F4F), UINT32_C(0xC7EEAC84), + UINT32_C(0xB5EF5184), UINT32_C(0xBA370148), UINT32_C(0x8732E055), + UINT32_C(0x4A5A2866), UINT32_C(0xB887C36F), UINT32_C(0x14B8DCDC) }, + { UINT32_C(0x433F093D), UINT32_C(0xDBA8C85C), UINT32_C(0x1C9A201C), + UINT32_C(0x73DF549D), UINT32_C(0x70F927D8), UINT32_C(0x69AA0D7B), + UINT32_C(0xD7D2493A), UINT32_C(0xFA3A8685), UINT32_C(0x0A7F4013), + UINT32_C(0x6F48A255), UINT32_C(0xDD393067), UINT32_C(0xD20C8BF9) } }, + { { UINT32_C(0x81625E78), UINT32_C(0x4EC874EA), UINT32_C(0x3FBE9267), + UINT32_C(0x8B8D8B5A), UINT32_C(0x9421EC2F), UINT32_C(0xA3D9D164), + UINT32_C(0x880EA295), UINT32_C(0x490E92D9), UINT32_C(0xD8F3B6DA), + UINT32_C(0x745D1EDC), UINT32_C(0x8F18BA03), UINT32_C(0x0116628B) }, + { UINT32_C(0x834EADCE), UINT32_C(0x0FF6BCE0), UINT32_C(0x000827F7), + UINT32_C(0x464697F2), UINT32_C(0x498D724E), UINT32_C(0x08DCCF84), + UINT32_C(0x1E88304C), UINT32_C(0x7896D365), UINT32_C(0x135E3622), + UINT32_C(0xE63EBCCE), UINT32_C(0xDC007521), UINT32_C(0xFB942E8E) } }, + { { UINT32_C(0xA3688621), UINT32_C(0xBB155A66), UINT32_C(0xF91B52A3), + UINT32_C(0xED2FD7CD), UINT32_C(0xEA20CB88), UINT32_C(0x52798F5D), + UINT32_C(0x373F7DD8), UINT32_C(0x069CE105), UINT32_C(0x8CA78F6B), + UINT32_C(0xF9392EC7), UINT32_C(0x6B335169), UINT32_C(0xB3013E25) }, + { UINT32_C(0x6B11715C), UINT32_C(0x1D92F800), UINT32_C(0xFF9DC464), + UINT32_C(0xADD4050E), UINT32_C(0x8465B84A), UINT32_C(0x2AC22659), + UINT32_C(0x465B2BD6), UINT32_C(0x2729D646), UINT32_C(0xE4EFF9DD), + UINT32_C(0x6202344A), UINT32_C(0xCD9B90B9), UINT32_C(0x51F3198F) } }, + { { UINT32_C(0xE5F0AE1D), UINT32_C(0x17CE54EF), UINT32_C(0xB09852AF), + UINT32_C(0x984E8204), UINT32_C(0xC4B27A71), UINT32_C(0x3365B37A), + UINT32_C(0xA00E0A9C), UINT32_C(0x720E3152), UINT32_C(0x925BD606), + UINT32_C(0x3692F70D), UINT32_C(0x7BC7E9AB), UINT32_C(0xBE6E699D) }, + { UINT32_C(0x4C89A3C0), UINT32_C(0xD75C041F), UINT32_C(0x8DC100C0), + UINT32_C(0x8B9F592D), UINT32_C(0xAD228F71), UINT32_C(0x30750F3A), + UINT32_C(0xE8B17A11), UINT32_C(0x1B9ECF84), UINT32_C(0x0FBFA8A2), + UINT32_C(0xDF202562), UINT32_C(0xAA1B6D67), UINT32_C(0x45C811FC) } }, + { { UINT32_C(0x1A5151F8), UINT32_C(0xEC5B84B7), UINT32_C(0x550AB2D2), + UINT32_C(0x118E59E8), UINT32_C(0x049BD735), UINT32_C(0x2CCDEDA4), + UINT32_C(0x9CD62F0F), UINT32_C(0xC99CBA71), UINT32_C(0x62C9E4F8), + UINT32_C(0x69B8040A), UINT32_C(0x110B8283), UINT32_C(0x16F1A31A) }, + { UINT32_C(0x98E908A3), UINT32_C(0x53F63802), UINT32_C(0xD862F9DE), + UINT32_C(0x308CB6EF), UINT32_C(0xA521A95A), UINT32_C(0xE185DAD8), + UINT32_C(0x097F75CA), UINT32_C(0x4D8FE9A4), UINT32_C(0x1CA07D53), + UINT32_C(0xD1ECCEC7), UINT32_C(0x0DB07E83), UINT32_C(0x13DFA1DC) } }, + { { UINT32_C(0x0F591A76), UINT32_C(0xDDAF9DC6), UINT32_C(0x1685F412), + UINT32_C(0xE1A6D7CC), UINT32_C(0x002B6E8D), UINT32_C(0x153DE557), + UINT32_C(0xC6DA37D9), UINT32_C(0x730C38BC), UINT32_C(0x0914B597), + UINT32_C(0xAE180622), UINT32_C(0xDD8C3A0A), UINT32_C(0x84F98103) }, + { UINT32_C(0x8DA205B0), UINT32_C(0x369C5398), UINT32_C(0x3888A720), + UINT32_C(0xA3D95B81), UINT32_C(0xE10E2806), UINT32_C(0x1F3F8BBF), + UINT32_C(0x4530D1F3), UINT32_C(0x48663DF5), UINT32_C(0x3E377713), + UINT32_C(0x320523B4), UINT32_C(0xC7894814), UINT32_C(0xE8B1A575) } }, + { { UINT32_C(0x2EE8EA07), UINT32_C(0x33066871), UINT32_C(0x60DA199D), + UINT32_C(0xC6FB4EC5), UINT32_C(0xF4370A05), UINT32_C(0x33231860), + UINT32_C(0xC6DE4E26), UINT32_C(0x7ABECE72), UINT32_C(0xEBDECE7A), + UINT32_C(0xDE8D4BD8), UINT32_C(0x1CBE93C7), UINT32_C(0xC90EE657) }, + { UINT32_C(0x85AC2509), UINT32_C(0x0246751B), UINT32_C(0x30380245), + UINT32_C(0xD0EF142C), UINT32_C(0x7C76E39C), UINT32_C(0x086DF9C4), + UINT32_C(0xB789FB56), UINT32_C(0x68F1304F), UINT32_C(0xA5E4BD56), + UINT32_C(0x23E4CB98), UINT32_C(0x64663DCA), UINT32_C(0x69A4C63C) } }, + { { UINT32_C(0x7CB34E63), UINT32_C(0x6C72B6AF), UINT32_C(0x6DFC23FE), + UINT32_C(0x073C40CD), UINT32_C(0xC936693A), UINT32_C(0xBDEEE7A1), + UINT32_C(0x6EFAD378), UINT32_C(0xBC858E80), UINT32_C(0xF5BE55D4), + UINT32_C(0xEAD719FF), UINT32_C(0x04552F5F), UINT32_C(0xC8C3238F) }, + { UINT32_C(0x928D5784), UINT32_C(0x0952C068), UINT32_C(0x94C58F2B), + UINT32_C(0x89DFDF22), UINT32_C(0x67502C50), UINT32_C(0x332DEDF3), + UINT32_C(0xAC0BE258), UINT32_C(0x3ED2FA3A), UINT32_C(0x7C5C8244), + UINT32_C(0xAEDC9B8A), UINT32_C(0xDC0EA34F), UINT32_C(0x43A761B9) } }, + { { UINT32_C(0xCC5E21A5), UINT32_C(0x8FD683A2), UINT32_C(0xFBA2BB68), + UINT32_C(0x5F444C6E), UINT32_C(0xAF05586D), UINT32_C(0x709ACD0E), + UINT32_C(0xDE8FB348), UINT32_C(0x8EFA54D2), UINT32_C(0x34CFE29E), + UINT32_C(0x35276B71), UINT32_C(0x941EAC8C), UINT32_C(0x77A06FCD) }, + { UINT32_C(0x928322DD), UINT32_C(0x5815792D), UINT32_C(0x67F7CB59), + UINT32_C(0x82FF356B), UINT32_C(0x304980F4), UINT32_C(0x71E40A78), + UINT32_C(0x3667D021), UINT32_C(0xC8645C27), UINT32_C(0xAEBAE28F), + UINT32_C(0xE785741C), UINT32_C(0x53ECAC37), UINT32_C(0xB2C1BC75) } }, + { { UINT32_C(0x1D0A74DB), UINT32_C(0x633EB24F), UINT32_C(0xFA752512), + UINT32_C(0xF1F55E56), UINT32_C(0x8EFE11DE), UINT32_C(0x75FECA68), + UINT32_C(0xE6BF19EC), UINT32_C(0xC80FD91C), UINT32_C(0x2A14C908), + UINT32_C(0xAD0BAFEC), UINT32_C(0xADE4031F), UINT32_C(0x4E1C4ACA) }, + { UINT32_C(0x1EB1549A), UINT32_C(0x463A815B), UINT32_C(0x668F1298), + UINT32_C(0x5AD4253C), UINT32_C(0x38A37151), UINT32_C(0x5CB38662), + UINT32_C(0xAFF16B96), UINT32_C(0x34BB1CCF), UINT32_C(0xEE731AB0), + UINT32_C(0xDCA93B13), UINT32_C(0x9BE01A0B), UINT32_C(0x9F3CE5CC) } }, + { { UINT32_C(0xA110D331), UINT32_C(0x75DB5723), UINT32_C(0x7123D89F), + UINT32_C(0x67C66F6A), UINT32_C(0x4009D570), UINT32_C(0x27ABBD4B), + UINT32_C(0xC73451BC), UINT32_C(0xACDA6F84), UINT32_C(0x05575ACF), + UINT32_C(0xE4B9A239), UINT32_C(0xAB2D3D6C), UINT32_C(0x3C2DB7EF) }, + { UINT32_C(0x29115145), UINT32_C(0x01CCDD08), UINT32_C(0x57B5814A), + UINT32_C(0x9E0602FE), UINT32_C(0x87862838), UINT32_C(0x679B35C2), + UINT32_C(0x38AD598D), UINT32_C(0x0277DC4C), UINT32_C(0x6D896DD4), + UINT32_C(0xEF80A213), UINT32_C(0xE7B9047B), UINT32_C(0xC8812213) } }, + }, + { + { { UINT32_C(0xEDC9CE62), UINT32_C(0xAC6DBDF6), UINT32_C(0x0F9C006E), + UINT32_C(0xA58F5B44), UINT32_C(0xDC28E1B0), UINT32_C(0x16694DE3), + UINT32_C(0xA6647711), UINT32_C(0x2D039CF2), UINT32_C(0xC5B08B4B), + UINT32_C(0xA13BBE6F), UINT32_C(0x10EBD8CE), UINT32_C(0xE44DA930) }, + { UINT32_C(0x19649A16), UINT32_C(0xCD472087), UINT32_C(0x683E5DF1), + UINT32_C(0xE18F4E44), UINT32_C(0x929BFA28), UINT32_C(0xB3F66303), + UINT32_C(0x818249BF), UINT32_C(0x7C378E43), UINT32_C(0x847F7CD9), + UINT32_C(0x76068C80), UINT32_C(0x987EBA16), UINT32_C(0xEE3DB6D1) } }, + { { UINT32_C(0xC42A2F52), UINT32_C(0xCBBD8576), UINT32_C(0x9D2B06BB), + UINT32_C(0x9ACC6F70), UINT32_C(0x2E6B72A4), UINT32_C(0xE5CB5620), + UINT32_C(0x7C024443), UINT32_C(0x5738EA0E), UINT32_C(0xB55368F3), + UINT32_C(0x8ED06170), UINT32_C(0x1AEED44F), UINT32_C(0xE54C99BB) }, + { UINT32_C(0xE2E0D8B2), UINT32_C(0x3D90A6B2), UINT32_C(0xCF7B2856), + UINT32_C(0x21718977), UINT32_C(0xC5612AEC), UINT32_C(0x089093DC), + UINT32_C(0x99C1BACC), UINT32_C(0xC272EF6F), UINT32_C(0xDC43EAAD), + UINT32_C(0x47DB3B43), UINT32_C(0x0832D891), UINT32_C(0x730F30E4) } }, + { { UINT32_C(0x0C7FECDB), UINT32_C(0x9FFE5563), UINT32_C(0xF88101E5), + UINT32_C(0x55CC67B6), UINT32_C(0xCBEFA3C7), UINT32_C(0x3039F981), + UINT32_C(0x667BFD64), UINT32_C(0x2AB06883), UINT32_C(0x4340E3DF), + UINT32_C(0x9007A257), UINT32_C(0x5A3A49CA), UINT32_C(0x1AC3F3FA) }, + { UINT32_C(0xC97E20FD), UINT32_C(0x9C7BE629), UINT32_C(0xA3DAE003), + UINT32_C(0xF61823D3), UINT32_C(0xE7380DBA), UINT32_C(0xFFE7FF39), + UINT32_C(0x9FACC3B8), UINT32_C(0x620BB9B5), UINT32_C(0x31AE422C), + UINT32_C(0x2DDCB8CD), UINT32_C(0xD12C3C43), UINT32_C(0x1DE3BCFA) } }, + { { UINT32_C(0xD6E0F9A9), UINT32_C(0x8C074946), UINT32_C(0x51C3B05B), + UINT32_C(0x662FA995), UINT32_C(0x04BB2048), UINT32_C(0x6CDAE969), + UINT32_C(0xD6DC8B60), UINT32_C(0x6DEC9594), UINT32_C(0x54438BBC), + UINT32_C(0x8D265869), UINT32_C(0x1B0E95A5), UINT32_C(0x88E983E3) }, + { UINT32_C(0x60CBF838), UINT32_C(0x8189F114), UINT32_C(0x771DC46B), + UINT32_C(0x77190697), UINT32_C(0x27F8EC1A), UINT32_C(0x775775A2), + UINT32_C(0x607E3739), UINT32_C(0x7A125240), UINT32_C(0x4F793E4E), + UINT32_C(0xAFAE84E7), UINT32_C(0x5BF5BAF4), UINT32_C(0x44FA17F3) } }, + { { UINT32_C(0xD03AC439), UINT32_C(0xA21E69A5), UINT32_C(0x88AA8094), + UINT32_C(0x2069C5FC), UINT32_C(0x8C08F206), UINT32_C(0xB041EEA7), + UINT32_C(0x3D65B8ED), UINT32_C(0x55B9D461), UINT32_C(0xD392C7C4), + UINT32_C(0x951EA25C), UINT32_C(0x9D166232), UINT32_C(0x4B9A1CEC) }, + { UINT32_C(0xFCF931A4), UINT32_C(0xC184FCD8), UINT32_C(0x063AD374), + UINT32_C(0xBA59AD44), UINT32_C(0x1AA9796F), UINT32_C(0x1868AD2A), + UINT32_C(0xDFF29832), UINT32_C(0x38A34018), UINT32_C(0x03DF8070), + UINT32_C(0x01FC8801), UINT32_C(0x48DD334A), UINT32_C(0x1282CCE0) } }, + { { UINT32_C(0x26D8503C), UINT32_C(0x76AA9557), UINT32_C(0x6BC3E3D0), + UINT32_C(0xBE962B63), UINT32_C(0x97DE8841), UINT32_C(0xF5CA93E5), + UINT32_C(0xAF3F2C16), UINT32_C(0x1561B05E), UINT32_C(0xD34BFF98), + UINT32_C(0x34BE00AA), UINT32_C(0xD23D2925), UINT32_C(0xEA21E6E9) }, + { UINT32_C(0x394C3AFB), UINT32_C(0x55713230), UINT32_C(0xD6C8BECA), + UINT32_C(0xEAF0529B), UINT32_C(0x202B9A11), UINT32_C(0xFF38A743), + UINT32_C(0x6D3A398B), UINT32_C(0xA13E39FC), UINT32_C(0x86E2615A), + UINT32_C(0x8CBD644B), UINT32_C(0x191057EC), UINT32_C(0x92063988) } }, + { { UINT32_C(0x13F89146), UINT32_C(0x787835CE), UINT32_C(0x69446C3F), + UINT32_C(0x7FCD42CC), UINT32_C(0x840E679D), UINT32_C(0x0DA2AA98), + UINT32_C(0x18779A1B), UINT32_C(0x44F20523), UINT32_C(0xEFBF5935), + UINT32_C(0xE3A3B34F), UINT32_C(0xB9947B70), UINT32_C(0xA5D2CFD0) }, + { UINT32_C(0x27F4E16F), UINT32_C(0xAE2AF4EF), UINT32_C(0xB9D21322), + UINT32_C(0xA7FA70D2), UINT32_C(0xB3FD566B), UINT32_C(0x68084919), + UINT32_C(0xD7AAD6AB), UINT32_C(0xF04D71C8), UINT32_C(0x10BC4260), + UINT32_C(0xDBEA21E4), UINT32_C(0x8D949B42), UINT32_C(0xAA7DC665) } }, + { { UINT32_C(0x6CCB8213), UINT32_C(0xD8E958A0), UINT32_C(0x91900B54), + UINT32_C(0x118D9DB9), UINT32_C(0x85E8CED6), UINT32_C(0x09BB9D49), + UINT32_C(0x24019281), UINT32_C(0x410E9FB5), UINT32_C(0x6D74C86E), + UINT32_C(0x3B31B4E1), UINT32_C(0x020BB77D), UINT32_C(0x52BC0252) }, + { UINT32_C(0x27092CE4), UINT32_C(0x5616A26F), UINT32_C(0xA08F65CD), + UINT32_C(0x67774DBC), UINT32_C(0xC08BD569), UINT32_C(0x560AD494), + UINT32_C(0xAD498783), UINT32_C(0xBE26DA36), UINT32_C(0x7F019C91), + UINT32_C(0x0276C8AB), UINT32_C(0x5248266E), UINT32_C(0x09843ADA) } }, + { { UINT32_C(0x7D963CF2), UINT32_C(0xA0AE88A7), UINT32_C(0xD0E84920), + UINT32_C(0x91EF8986), UINT32_C(0xF8C58104), UINT32_C(0xC7EFE344), + UINT32_C(0xECA20773), UINT32_C(0x0A25D9FD), UINT32_C(0x00D8F1D5), + UINT32_C(0x9D989FAA), UINT32_C(0xC8B06264), UINT32_C(0x4204C8CE) }, + { UINT32_C(0xBE1A2796), UINT32_C(0x717C12E0), UINT32_C(0xC190C728), + UINT32_C(0x1FA4BA8C), UINT32_C(0x8C8A59BA), UINT32_C(0xA245CA8D), + UINT32_C(0x7672B935), UINT32_C(0xE3C37475), UINT32_C(0x2E4D6375), + UINT32_C(0x083D5E40), UINT32_C(0x5455E16E), UINT32_C(0x0B8D5AB3) } }, + { { UINT32_C(0xEED765D4), UINT32_C(0x1DB17DBF), UINT32_C(0xA5DDB965), + UINT32_C(0xBBC9B1BE), UINT32_C(0xDFC12ABC), UINT32_C(0x1948F76D), + UINT32_C(0x134EF489), UINT32_C(0x2C2714E5), UINT32_C(0x741C600F), + UINT32_C(0x60CE2EE8), UINT32_C(0xF80E6E63), UINT32_C(0x32396F22) }, + { UINT32_C(0x22537F59), UINT32_C(0x421DAC75), UINT32_C(0x49475DF5), + UINT32_C(0x58FB73C6), UINT32_C(0x6F18F1C7), UINT32_C(0x0ABF2885), + UINT32_C(0x9A398D16), UINT32_C(0x36474468), UINT32_C(0xBF673B87), + UINT32_C(0x87A661A7), UINT32_C(0x73819E17), UINT32_C(0x3E80698F) } }, + { { UINT32_C(0x53784CC4), UINT32_C(0xDFE49793), UINT32_C(0x486D508F), + UINT32_C(0x4280EAB0), UINT32_C(0xE534F5A4), UINT32_C(0x119593FF), + UINT32_C(0x9F63242F), UINT32_C(0x98AEFADD), UINT32_C(0xC4829CAE), + UINT32_C(0x9AE6A24A), UINT32_C(0x58E8BA80), UINT32_C(0xF2373CA5) }, + { UINT32_C(0x51765FB3), UINT32_C(0x4017AF7E), UINT32_C(0xAF4AEC4B), + UINT32_C(0xD1E40F7C), UINT32_C(0x0898E3BC), UINT32_C(0x87372C7A), + UINT32_C(0x85452CA9), UINT32_C(0x688982B2), UINT32_C(0xB1E50BCA), + UINT32_C(0x71E0B4BF), UINT32_C(0xF70E714A), UINT32_C(0x21FD2DBF) } }, + { { UINT32_C(0xFB78DDAC), UINT32_C(0xEE6E8820), UINT32_C(0x063892CD), + UINT32_C(0x0BAED29C), UINT32_C(0x28C0588D), UINT32_C(0x5F33049C), + UINT32_C(0x18DBC432), UINT32_C(0x90C2515E), UINT32_C(0x3B4CB0BD), + UINT32_C(0xB8A1B143), UINT32_C(0x68103043), UINT32_C(0x0AB5C0C9) }, + { UINT32_C(0x4005EC40), UINT32_C(0xF3788FA0), UINT32_C(0x039EE115), + UINT32_C(0x82571C99), UINT32_C(0x93260BED), UINT32_C(0xEE8FCED5), + UINT32_C(0x10836D18), UINT32_C(0x5A9BAF79), UINT32_C(0xC46AA4F6), + UINT32_C(0x7C258B09), UINT32_C(0x37F53D31), UINT32_C(0x46ECC5E8) } }, + { { UINT32_C(0xBFE0DD98), UINT32_C(0xFA32C0DC), UINT32_C(0x962B1066), + UINT32_C(0x66EFAFC4), UINT32_C(0x64BDF5EB), UINT32_C(0xBA81D33E), + UINT32_C(0xFC7FC512), UINT32_C(0x36C28536), UINT32_C(0xE0B4FA97), + UINT32_C(0x0C95176B), UINT32_C(0x3B9BC64A), UINT32_C(0x47DDE29B) }, + { UINT32_C(0x5C173B36), UINT32_C(0x08D986FD), UINT32_C(0x6CF3F28C), + UINT32_C(0x46D84B52), UINT32_C(0xF026BDB9), UINT32_C(0x6F6ED6C3), + UINT32_C(0x68206DC5), UINT32_C(0xAC90668B), UINT32_C(0xECBE4E70), + UINT32_C(0xE8ED5D98), UINT32_C(0xDC1A6974), UINT32_C(0xCFFF61DD) } }, + { { UINT32_C(0x77B1A5C1), UINT32_C(0xFF5C3A29), UINT32_C(0x0DDF995D), + UINT32_C(0x10C27E4A), UINT32_C(0xE23363E3), UINT32_C(0xCB745F77), + UINT32_C(0x32F399A3), UINT32_C(0xD765DF6F), UINT32_C(0x8A99E109), + UINT32_C(0xF0CA0C2F), UINT32_C(0x1E025CA0), UINT32_C(0xC3A6BFB7) }, + { UINT32_C(0x4F9D9FA5), UINT32_C(0x830B2C0A), UINT32_C(0xBD1A84E5), + UINT32_C(0xAE914CAC), UINT32_C(0xA4FEBCC1), UINT32_C(0x30B35ED8), + UINT32_C(0x84CFBF2E), UINT32_C(0xCB902B46), UINT32_C(0x25FC6375), + UINT32_C(0x0BD47628), UINT32_C(0x85509D04), UINT32_C(0xA858A53C) } }, + { { UINT32_C(0x552E0A3F), UINT32_C(0x8B995D0C), UINT32_C(0x17BE9FF7), + UINT32_C(0xEDBD4E94), UINT32_C(0x95085178), UINT32_C(0x3432E839), + UINT32_C(0x80C256F5), UINT32_C(0x0FE5C181), UINT32_C(0xEBF9597C), + UINT32_C(0x05A64EA8), UINT32_C(0x3F80371F), UINT32_C(0x6ED44BB1) }, + { UINT32_C(0xFE4C12EE), UINT32_C(0x6A29A05E), UINT32_C(0xE0BB83B3), + UINT32_C(0x3E436A43), UINT32_C(0x74D72921), UINT32_C(0x38365D9A), + UINT32_C(0xC38E1ED7), UINT32_C(0x3F5EE823), UINT32_C(0xE8FA063F), + UINT32_C(0x09A53213), UINT32_C(0xB435E713), UINT32_C(0x1E7FE47A) } }, + { { UINT32_C(0xFDDD17F3), UINT32_C(0xE4D9BC94), UINT32_C(0xC1016C20), + UINT32_C(0xC74B8FED), UINT32_C(0xB49C060E), UINT32_C(0x095DE39B), + UINT32_C(0x8AC0DF00), UINT32_C(0xDBCC6795), UINT32_C(0x1C34F4DF), + UINT32_C(0x4CF6BAEB), UINT32_C(0xE8390170), UINT32_C(0x72C55C21) }, + { UINT32_C(0xF6C48E79), UINT32_C(0x4F17BFD2), UINT32_C(0x017A80BA), + UINT32_C(0x18BF4DA0), UINT32_C(0xBCF4B138), UINT32_C(0xCF51D829), + UINT32_C(0xF48F8B0D), UINT32_C(0x598AEE5F), UINT32_C(0x20F10809), + UINT32_C(0x83FAEE56), UINT32_C(0x779F0850), UINT32_C(0x4615D4DC) } }, + }, + { + { { UINT32_C(0x5852B59B), UINT32_C(0x22313DEE), UINT32_C(0xB6A0B37F), + UINT32_C(0x6F56C8E8), UINT32_C(0xA76EC380), UINT32_C(0x43D6EEAE), + UINT32_C(0x0275AD36), UINT32_C(0xA1655136), UINT32_C(0xDF095BDA), + UINT32_C(0xE5C1B65A), UINT32_C(0x367C44B0), UINT32_C(0xBD1FFA8D) }, + { UINT32_C(0x6B48AF2B), UINT32_C(0xE2B419C2), UINT32_C(0x3DA194C8), + UINT32_C(0x57BBBD97), UINT32_C(0xA2BAFF05), UINT32_C(0xB5FBE51F), + UINT32_C(0x6269B5D0), UINT32_C(0xA0594D70), UINT32_C(0x23E8D667), + UINT32_C(0x0B07B705), UINT32_C(0x63E016E7), UINT32_C(0xAE1976B5) } }, + { { UINT32_C(0xFBECAAAE), UINT32_C(0x2FDE4893), UINT32_C(0x30332229), + UINT32_C(0x444346DE), UINT32_C(0x09456ED5), UINT32_C(0x157B8A5B), + UINT32_C(0x25797C6C), UINT32_C(0x73606A79), UINT32_C(0x33C14C06), + UINT32_C(0xA9D0F47C), UINT32_C(0xFAF971CA), UINT32_C(0x7BC8962C) }, + { UINT32_C(0x65909DFD), UINT32_C(0x6E763C51), UINT32_C(0x14A9BF42), + UINT32_C(0x1BBBE41B), UINT32_C(0xC49E9EFC), UINT32_C(0xD95B7ECB), + UINT32_C(0xB38F2B59), UINT32_C(0x0C317927), UINT32_C(0xB3C397DB), + UINT32_C(0x97912B53), UINT32_C(0x45C7ABC7), UINT32_C(0xCB3879AA) } }, + { { UINT32_C(0x24359B81), UINT32_C(0xCD81BDCF), UINT32_C(0xDB4C321C), + UINT32_C(0x6FD326E2), UINT32_C(0xF8EBE39C), UINT32_C(0x4CB0228B), + UINT32_C(0xB2CDD852), UINT32_C(0x496A9DCE), UINT32_C(0xD0E9B3AF), + UINT32_C(0x0F115A1A), UINT32_C(0xD8EEEF8A), UINT32_C(0xAA08BF36) }, + { UINT32_C(0x06E5E739), UINT32_C(0x5232A515), UINT32_C(0x8407A551), + UINT32_C(0x21FAE9D5), UINT32_C(0x8994B4E8), UINT32_C(0x289D18B0), + UINT32_C(0x09097A52), UINT32_C(0xB4E346A8), UINT32_C(0x324621D0), + UINT32_C(0xC641510F), UINT32_C(0x95A41AB8), UINT32_C(0xC567FD4A) } }, + { { UINT32_C(0xD57C8DE9), UINT32_C(0x261578C7), UINT32_C(0x3836C5C8), + UINT32_C(0xB9BC491F), UINT32_C(0x14C8038F), UINT32_C(0x993266B4), + UINT32_C(0xFAA7CC39), UINT32_C(0xBACAD755), UINT32_C(0xD69B7E27), + UINT32_C(0x418C4DEF), UINT32_C(0xAE751533), UINT32_C(0x53FDC5CD) }, + { UINT32_C(0xC3EEA63A), UINT32_C(0x6F3BD329), UINT32_C(0xE53DD29E), + UINT32_C(0xA7A22091), UINT32_C(0xDC4C54EC), UINT32_C(0xB7164F73), + UINT32_C(0x44D3D74E), UINT32_C(0xCA66290D), UINT32_C(0x4C9EA511), + UINT32_C(0xF77C6242), UINT32_C(0x1F714C49), UINT32_C(0x34337F55) } }, + { { UINT32_C(0xA64B6C4B), UINT32_C(0x5ED2B216), UINT32_C(0x3AAE640D), + UINT32_C(0x1C38794F), UINT32_C(0x8905794F), UINT32_C(0x30BBAEE0), + UINT32_C(0xC8699CFB), UINT32_C(0x0D9EE41E), UINT32_C(0xCF7B7C29), + UINT32_C(0xAF38DAF2), UINT32_C(0x43E53513), UINT32_C(0x0D6A05CA) }, + { UINT32_C(0x2606AB56), UINT32_C(0xBE96C644), UINT32_C(0xE9EB9734), + UINT32_C(0x13E7A072), UINT32_C(0x5FF50CD7), UINT32_C(0xF9669445), + UINT32_C(0x47DA6F1D), UINT32_C(0x68EF26B5), UINT32_C(0x23687CB7), + UINT32_C(0xF0028738), UINT32_C(0x6217C1CE), UINT32_C(0x5ED9C876) } }, + { { UINT32_C(0x0A3A9691), UINT32_C(0x423BA513), UINT32_C(0xB3179296), + UINT32_C(0xF421B1E7), UINT32_C(0x1A871E1B), UINT32_C(0x6B51BCDB), + UINT32_C(0x464E4300), UINT32_C(0x6E3BB5B5), UINT32_C(0xFC6C54CC), + UINT32_C(0x24171E2E), UINT32_C(0xD3E58DC2), UINT32_C(0xA9DFA947) }, + { UINT32_C(0x9DE9CFA7), UINT32_C(0x175B3309), UINT32_C(0x2D1015DA), + UINT32_C(0x707B2529), UINT32_C(0x993EA65A), UINT32_C(0xCBB95F17), + UINT32_C(0x0447450D), UINT32_C(0x93515063), UINT32_C(0x1B2753C9), + UINT32_C(0x0F47B205), UINT32_C(0xE7D427CF), UINT32_C(0x4A0BAB14) } }, + { { UINT32_C(0xB5AA7CA1), UINT32_C(0xA39DEF39), UINT32_C(0xC47C33DF), + UINT32_C(0x591CB173), UINT32_C(0x6BBAB872), UINT32_C(0xA09DAC79), + UINT32_C(0x7208BA2F), UINT32_C(0x3EF9D7CF), UINT32_C(0x7A0A34FC), + UINT32_C(0x3CC18931), UINT32_C(0xBCC3380F), UINT32_C(0xAE31C62B) }, + { UINT32_C(0x0287C0B4), UINT32_C(0xD72A6794), UINT32_C(0x68E334F1), + UINT32_C(0x3373382C), UINT32_C(0xBD20C6A6), UINT32_C(0xD0310CA8), + UINT32_C(0x42C033FD), UINT32_C(0xA2734B87), UINT32_C(0x8DCE4509), + UINT32_C(0xA5D390F1), UINT32_C(0x3E1AFCB5), UINT32_C(0xFC84E74B) } }, + { { UINT32_C(0xF2CD8A9C), UINT32_C(0xB028334D), UINT32_C(0x570F76F6), + UINT32_C(0xB8719291), UINT32_C(0x01065A2D), UINT32_C(0x662A386E), + UINT32_C(0x53D940AE), UINT32_C(0xDF1634CB), UINT32_C(0x8F5B41F9), + UINT32_C(0x625A7B83), UINT32_C(0xEE6AA1B4), UINT32_C(0xA033E4FE) }, + { UINT32_C(0x1E42BABB), UINT32_C(0x51E9D463), UINT32_C(0x0D388468), + UINT32_C(0x660BC2E4), UINT32_C(0xFCBB114A), UINT32_C(0x3F702189), + UINT32_C(0xB414CA78), UINT32_C(0x6B46FE35), UINT32_C(0x4A57316B), + UINT32_C(0x328F6CF2), UINT32_C(0x381AD156), UINT32_C(0x917423B5) } }, + { { UINT32_C(0x5373A607), UINT32_C(0xAC19306E), UINT32_C(0x191D0969), + UINT32_C(0x471DF8E3), UINT32_C(0xB9720D83), UINT32_C(0x380ADE35), + UINT32_C(0x48F1FD5C), UINT32_C(0x7423FDF5), UINT32_C(0x49CABC95), + UINT32_C(0x8B090C9F), UINT32_C(0xC9842F2F), UINT32_C(0xB768E8CD) }, + { UINT32_C(0xE56162D6), UINT32_C(0x399F456D), UINT32_C(0x4F326791), + UINT32_C(0xBB6BA240), UINT32_C(0x342590BE), UINT32_C(0x8F4FBA3B), + UINT32_C(0x3DFB6B3E), UINT32_C(0x053986B9), UINT32_C(0x190C7425), + UINT32_C(0xBB6739F1), UINT32_C(0x32F7E95F), UINT32_C(0x32D4A553) } }, + { { UINT32_C(0x0DDBFB21), UINT32_C(0x0205A0EC), UINT32_C(0x33AC3407), + UINT32_C(0x3010327D), UINT32_C(0x3348999B), UINT32_C(0xCF2F4DB3), + UINT32_C(0x1551604A), UINT32_C(0x660DB9F4), UINT32_C(0x5D38D335), + UINT32_C(0xC346C69A), UINT32_C(0x38882479), UINT32_C(0x64AAB3D3) }, + { UINT32_C(0x6AE44403), UINT32_C(0xA096B5E7), UINT32_C(0x645F76CD), + UINT32_C(0x6B4C9571), UINT32_C(0x4711120F), UINT32_C(0x72E1CD5F), + UINT32_C(0xF27CC3E1), UINT32_C(0x93EC42AC), UINT32_C(0xA72ABB12), + UINT32_C(0x2D18D004), UINT32_C(0xC9841A04), UINT32_C(0x232E9568) } }, + { { UINT32_C(0x3CC7F908), UINT32_C(0xFF01DB22), UINT32_C(0xD13CDD3B), + UINT32_C(0x9F214F8F), UINT32_C(0xE0B014B5), UINT32_C(0x38DADBB7), + UINT32_C(0x94245C95), UINT32_C(0x2C548CCC), UINT32_C(0x809AFCE3), + UINT32_C(0x714BE331), UINT32_C(0x9BFE957E), UINT32_C(0xBCC64410) }, + { UINT32_C(0x5B957F80), UINT32_C(0xC21C2D21), UINT32_C(0xBB8A4C42), + UINT32_C(0xBA2D4FDC), UINT32_C(0x74817CEC), UINT32_C(0xFA6CD4AF), + UINT32_C(0xC528EAD6), UINT32_C(0x9E7FB523), UINT32_C(0x7714B10E), + UINT32_C(0xAED781FF), UINT32_C(0x94F04455), UINT32_C(0xB52BB592) } }, + { { UINT32_C(0x868CC68B), UINT32_C(0xA578BD69), UINT32_C(0x603F2C08), + UINT32_C(0xA40FDC8D), UINT32_C(0x2D81B042), UINT32_C(0x53D79BD1), + UINT32_C(0xA7587EAB), UINT32_C(0x1B136AF3), UINT32_C(0x868A16DB), + UINT32_C(0x1ED4F939), UINT32_C(0xD0B98273), UINT32_C(0x775A61FB) }, + { UINT32_C(0xE56BEF8C), UINT32_C(0xBA5C12A6), UINT32_C(0xDDDC8595), + UINT32_C(0xF926CE52), UINT32_C(0x586FE1F8), UINT32_C(0xA13F5C8F), + UINT32_C(0x060DBB54), UINT32_C(0xEAC9F7F2), UINT32_C(0x51AF4342), + UINT32_C(0x70C0AC3A), UINT32_C(0x79CDA450), UINT32_C(0xC16E303C) } }, + { { UINT32_C(0x8113F4EA), UINT32_C(0xD0DADD6C), UINT32_C(0x07BDF09F), + UINT32_C(0xF14E3922), UINT32_C(0xAA7D877C), UINT32_C(0x3FE5E9C2), + UINT32_C(0x48779264), UINT32_C(0x9EA95C19), UINT32_C(0x4FCB8344), + UINT32_C(0xE93F65A7), UINT32_C(0x76D925A4), UINT32_C(0x9F40837E) }, + { UINT32_C(0x8271FFC7), UINT32_C(0x0EA6DA3F), UINT32_C(0xCC8F9B19), + UINT32_C(0x557FA529), UINT32_C(0x78E6DDFD), UINT32_C(0x2613DBF1), + UINT32_C(0x36B1E954), UINT32_C(0x7A7523B8), UINT32_C(0x406A87FB), + UINT32_C(0x20EB3168), UINT32_C(0x03ABA56A), UINT32_C(0x64C21C14) } }, + { { UINT32_C(0xC032DD5F), UINT32_C(0xE86C9C2D), UINT32_C(0x86F16A21), + UINT32_C(0x158CEB8E), UINT32_C(0x68326AF1), UINT32_C(0x0279FF53), + UINT32_C(0x59F12BA5), UINT32_C(0x1FFE2E2B), UINT32_C(0x86826D45), + UINT32_C(0xD75A46DB), UINT32_C(0x1E33E6AC), UINT32_C(0xE19B4841) }, + { UINT32_C(0x0E52991C), UINT32_C(0x5F0CC524), UINT32_C(0x8B116286), + UINT32_C(0x645871F9), UINT32_C(0xFCAEC5D3), UINT32_C(0xAB3B4B1E), + UINT32_C(0x51D0F698), UINT32_C(0x994C8DF0), UINT32_C(0xE5D13040), + UINT32_C(0x06F890AF), UINT32_C(0x5F96C7C2), UINT32_C(0x72D9DC23) } }, + { { UINT32_C(0xE7886A80), UINT32_C(0x7C018DEE), UINT32_C(0x8786E4A3), + UINT32_C(0xFA209330), UINT32_C(0xA4415CA1), UINT32_C(0xCEC8E2A3), + UINT32_C(0xCC83CC60), UINT32_C(0x5C736FC1), UINT32_C(0xF00C259F), + UINT32_C(0xFEF9788C), UINT32_C(0xDD29A6AD), UINT32_C(0xED5C01CB) }, + { UINT32_C(0x3E20825B), UINT32_C(0x87834A03), UINT32_C(0x123F9358), + UINT32_C(0x13B1239D), UINT32_C(0xFBC286C1), UINT32_C(0x7E8869D0), + UINT32_C(0x24CE8609), UINT32_C(0xC4AB5AA3), UINT32_C(0xB6349208), + UINT32_C(0x38716BEE), UINT32_C(0xB322AE21), UINT32_C(0x0BDF4F99) } }, + { { UINT32_C(0x53E3494B), UINT32_C(0x6B97A2BF), UINT32_C(0x70F7A13E), + UINT32_C(0xA8AA05C5), UINT32_C(0xF1305B51), UINT32_C(0x209709C2), + UINT32_C(0xDAB76F2C), UINT32_C(0x57B31888), UINT32_C(0xAA2A406A), + UINT32_C(0x75B2ECD7), UINT32_C(0xA35374A4), UINT32_C(0x88801A00) }, + { UINT32_C(0x45C0471B), UINT32_C(0xE1458D1C), UINT32_C(0x322C1AB0), + UINT32_C(0x5760E306), UINT32_C(0xAD6AB0A6), UINT32_C(0x789A0AF1), + UINT32_C(0xF458B9CE), UINT32_C(0x74398DE1), UINT32_C(0x32E0C65F), + UINT32_C(0x1652FF9F), UINT32_C(0xFFFB3A52), UINT32_C(0xFAF1F9D5) } }, + }, + { + { { UINT32_C(0xD1D1B007), UINT32_C(0xA05C751C), UINT32_C(0x0213E478), + UINT32_C(0x016C213B), UINT32_C(0xF4C98FEE), UINT32_C(0x9C56E26C), + UINT32_C(0xE7B3A7C7), UINT32_C(0x6084F8B9), UINT32_C(0xDECC1646), + UINT32_C(0xA0B042F6), UINT32_C(0xFBF3A0BC), UINT32_C(0x4A6F3C1A) }, + { UINT32_C(0x51C9F909), UINT32_C(0x94524C2C), UINT32_C(0x3A6D3748), + UINT32_C(0xF3B3AD40), UINT32_C(0x7CE1F9F5), UINT32_C(0x18792D6E), + UINT32_C(0xFC0C34FA), UINT32_C(0x8EBC2FD7), UINT32_C(0x780A1693), + UINT32_C(0x032A9F41), UINT32_C(0x56A60019), UINT32_C(0x34F9801E) } }, + { { UINT32_C(0xF0DB3751), UINT32_C(0xB398290C), UINT32_C(0xBA42C976), + UINT32_C(0x01170580), UINT32_C(0x56560B89), UINT32_C(0x3E71AA29), + UINT32_C(0x50E6647B), UINT32_C(0x80817AAC), UINT32_C(0xA0BE42DA), + UINT32_C(0x35C833AD), UINT32_C(0xF1BABA4E), UINT32_C(0xFA3C6148) }, + { UINT32_C(0xCD8F6253), UINT32_C(0xC57BE645), UINT32_C(0xC657AD0D), + UINT32_C(0x77CEE46B), UINT32_C(0x0DEFD908), UINT32_C(0x83007731), + UINT32_C(0x899CBA56), UINT32_C(0x92FE9BCE), UINT32_C(0xBCEFFB5A), + UINT32_C(0x48450EC4), UINT32_C(0xF2F5F4BF), UINT32_C(0xE615148D) } }, + { { UINT32_C(0x90B86166), UINT32_C(0xF55EDABB), UINT32_C(0x075430A2), + UINT32_C(0x27F7D784), UINT32_C(0x9BF17161), UINT32_C(0xF53E822B), + UINT32_C(0xAFE808DC), UINT32_C(0x4A5B3B93), UINT32_C(0xD7272F55), + UINT32_C(0x590BBBDE), UINT32_C(0xEAEA79A1), UINT32_C(0x233D63FA) }, + { UINT32_C(0xFE1EBA07), UINT32_C(0xD7042BEA), UINT32_C(0x10750D7E), + UINT32_C(0xD2B9AEA0), UINT32_C(0x31078AA5), UINT32_C(0xD8D1E690), + UINT32_C(0x7E37BC8B), UINT32_C(0x9E837F18), UINT32_C(0x85008975), + UINT32_C(0x9558FF4F), UINT32_C(0x421FE867), UINT32_C(0x93EDB837) } }, + { { UINT32_C(0x83D55B5A), UINT32_C(0xAA6489DF), UINT32_C(0x86BF27F7), + UINT32_C(0xEA092E49), UINT32_C(0x5FA2EFEC), UINT32_C(0x4D8943A9), + UINT32_C(0x720E1A8C), UINT32_C(0xC9BAAE53), UINT32_C(0x95A4F8A3), + UINT32_C(0xC055444B), UINT32_C(0xA7C1206B), UINT32_C(0x93BD01E8) }, + { UINT32_C(0x714A27DF), UINT32_C(0xD97765B6), UINT32_C(0x193F1B16), + UINT32_C(0xD622D954), UINT32_C(0xF1503B15), UINT32_C(0x115CC35A), + UINT32_C(0xA9FA21F8), UINT32_C(0x1DD5359F), UINT32_C(0x6DFED1F1), + UINT32_C(0x197C3299), UINT32_C(0xF77F2679), UINT32_C(0xDEE8B7C9) } }, + { { UINT32_C(0x394FD855), UINT32_C(0x5405179F), UINT32_C(0x49FDFB33), + UINT32_C(0xC9D6E244), UINT32_C(0xBD903393), UINT32_C(0x70EBCAB4), + UINT32_C(0xA2C56780), UINT32_C(0x0D3A3899), UINT32_C(0x683D1A0A), + UINT32_C(0x012C7256), UINT32_C(0x80A48F3B), UINT32_C(0xC688FC88) }, + { UINT32_C(0x6F7DF527), UINT32_C(0x18095754), UINT32_C(0x71315D16), + UINT32_C(0x9E339B4B), UINT32_C(0xA956BB12), UINT32_C(0x90560C28), + UINT32_C(0xD42EEE8D), UINT32_C(0x2BECEA60), UINT32_C(0x50632653), + UINT32_C(0x82AEB9A7), UINT32_C(0xDFA5CD6A), UINT32_C(0xED34353E) } }, + { { UINT32_C(0x91AECCE4), UINT32_C(0x82154D2C), UINT32_C(0x5041887F), + UINT32_C(0x312C6070), UINT32_C(0xFB9FBD71), UINT32_C(0xECF589F3), + UINT32_C(0xB524BDE4), UINT32_C(0x67660A7D), UINT32_C(0x724ACF23), + UINT32_C(0xE99B029D), UINT32_C(0x6D1CD891), UINT32_C(0xDF06E4AF) }, + { UINT32_C(0x80EE304D), UINT32_C(0x07806CB5), UINT32_C(0x7443A8F8), + UINT32_C(0x0C70BB9F), UINT32_C(0x08B0830A), UINT32_C(0x01EC3414), + UINT32_C(0x5A81510B), UINT32_C(0xFD7B63C3), UINT32_C(0x453B5F93), + UINT32_C(0xE90A0A39), UINT32_C(0x9BC71725), UINT32_C(0xAB700F8F) } }, + { { UINT32_C(0xB9F00793), UINT32_C(0x9401AEC2), UINT32_C(0xB997F0BF), + UINT32_C(0x064EC4F4), UINT32_C(0x849240C8), UINT32_C(0xDC0CC1FD), + UINT32_C(0xB6E92D72), UINT32_C(0x39A75F37), UINT32_C(0x0224A4AB), + UINT32_C(0xAA43CA5D), UINT32_C(0x54614C47), UINT32_C(0x9C4D6325) }, + { UINT32_C(0xC6709DA3), UINT32_C(0x1767366F), UINT32_C(0x23479232), + UINT32_C(0xA6B482D1), UINT32_C(0x84D63E85), UINT32_C(0x54DC6DDC), + UINT32_C(0xC99D3B9E), UINT32_C(0x0ACCB5AD), UINT32_C(0xE8AA3ABF), + UINT32_C(0x211716BB), UINT32_C(0x69EC6406), UINT32_C(0xD0FE25AD) } }, + { { UINT32_C(0xDF85C705), UINT32_C(0x0D5C1769), UINT32_C(0xA409DCD1), + UINT32_C(0x7086C93D), UINT32_C(0x0E8D75D8), UINT32_C(0x9710839D), + UINT32_C(0xEBDD4177), UINT32_C(0x17B7DB75), UINT32_C(0xF649A809), + UINT32_C(0xAF69EB58), UINT32_C(0x8A84E220), UINT32_C(0x6EF19EA2) }, + { UINT32_C(0x65C278B2), UINT32_C(0x36EB5C66), UINT32_C(0x81EA9D65), + UINT32_C(0xD2A15128), UINT32_C(0x769300AD), UINT32_C(0x4FCBA840), + UINT32_C(0xC8E536E5), UINT32_C(0xC2052CCD), UINT32_C(0xAC263B8F), + UINT32_C(0x9CAEE014), UINT32_C(0xF9239663), UINT32_C(0x56F7ED7A) } }, + { { UINT32_C(0xAC9E09E1), UINT32_C(0xF6FA251F), UINT32_C(0x955A2853), + UINT32_C(0xA3775605), UINT32_C(0xF2A4BD78), UINT32_C(0x977B8D21), + UINT32_C(0x3E096410), UINT32_C(0xF68AA7FF), UINT32_C(0x65F88419), + UINT32_C(0x01AB0552), UINT32_C(0xBB93F64E), UINT32_C(0xC4C8D77E) }, + { UINT32_C(0x3451FE64), UINT32_C(0x71825111), UINT32_C(0x46F9BAF0), + UINT32_C(0xFA0F905B), UINT32_C(0xCA49EF1A), UINT32_C(0x79BE3BF3), + UINT32_C(0x6CB02071), UINT32_C(0x831109B2), UINT32_C(0xC4DDBFE5), + UINT32_C(0x765F935F), UINT32_C(0x80E5A3BA), UINT32_C(0x6F99CD14) } }, + { { UINT32_C(0x234F91FF), UINT32_C(0xD2E8DA04), UINT32_C(0x813867AA), + UINT32_C(0x4DED4D6D), UINT32_C(0xE0A0D945), UINT32_C(0x3B50175D), + UINT32_C(0x4EB78137), UINT32_C(0x55AC7406), UINT32_C(0xE1D47730), + UINT32_C(0xE9FA7F6E), UINT32_C(0x5CBF2176), UINT32_C(0x2C171531) }, + { UINT32_C(0x2BE7A47D), UINT32_C(0xA521788F), UINT32_C(0x3FCF1AB3), + UINT32_C(0x95B15A27), UINT32_C(0xF28A946A), UINT32_C(0xAADA6401), + UINT32_C(0x8B4E898B), UINT32_C(0x628B2EF4), UINT32_C(0x6D6592CC), + UINT32_C(0x0E6F4629), UINT32_C(0xA723CADD), UINT32_C(0x997C7094) } }, + { { UINT32_C(0x6AFE80C6), UINT32_C(0x878BCE11), UINT32_C(0x007BBA38), + UINT32_C(0xA89ABC9D), UINT32_C(0xA7CC267F), UINT32_C(0xB0C1F87B), + UINT32_C(0x5104FF04), UINT32_C(0x86D33B9D), UINT32_C(0x2EF1BA42), + UINT32_C(0xB0504B1B), UINT32_C(0xB2827E88), UINT32_C(0x21693048) }, + { UINT32_C(0x79CFCD14), UINT32_C(0x11F1CCD5), UINT32_C(0x94AD227E), + UINT32_C(0x59C09FFA), UINT32_C(0x3EA91ACF), UINT32_C(0x95A4ADCB), + UINT32_C(0xB4370BAA), UINT32_C(0x1346238B), UINT32_C(0x3E1367B0), + UINT32_C(0xB099D202), UINT32_C(0x90F23CEA), UINT32_C(0xCF5BBDE6) } }, + { { UINT32_C(0xBCB3BE5E), UINT32_C(0x453299BB), UINT32_C(0x38E9FF97), + UINT32_C(0x123C588E), UINT32_C(0xF6A2E521), UINT32_C(0x8C115DD9), + UINT32_C(0xFF7D4B98), UINT32_C(0x6E333C11), UINT32_C(0xDA73E736), + UINT32_C(0x9DD061E5), UINT32_C(0x5CA53056), UINT32_C(0xC6AB7B3A) }, + { UINT32_C(0x5B30A76B), UINT32_C(0xF1EF3EE3), UINT32_C(0x961BA11F), + UINT32_C(0xADD6B44A), UINT32_C(0x2CA6E030), UINT32_C(0x7BB00B75), + UINT32_C(0x2FE270AD), UINT32_C(0x270272E8), UINT32_C(0x241A9239), + UINT32_C(0x23BC6F4F), UINT32_C(0x0BB94A94), UINT32_C(0x88581E13) } }, + { { UINT32_C(0x24EEF67F), UINT32_C(0xBD225A69), UINT32_C(0x0412CEB7), + UINT32_C(0x7CFD9614), UINT32_C(0x99AC298E), UINT32_C(0xF6DE1679), + UINT32_C(0xED6C3571), UINT32_C(0xB20FD895), UINT32_C(0x61836C56), + UINT32_C(0x03C73B78), UINT32_C(0xABA6CB34), UINT32_C(0xEE3C3A16) }, + { UINT32_C(0x4138408A), UINT32_C(0x9E8C5667), UINT32_C(0x2DD6EBDF), + UINT32_C(0xEC25FCB1), UINT32_C(0xDBBDF6E3), UINT32_C(0xC54C33FD), + UINT32_C(0x4A3C9DD4), UINT32_C(0x93E0913B), UINT32_C(0x35EDEED4), + UINT32_C(0x66D7D135), UINT32_C(0x453FB66E), UINT32_C(0xD29A36C4) } }, + { { UINT32_C(0x9F1943AF), UINT32_C(0x7F192F03), UINT32_C(0x4E0B5FB0), + UINT32_C(0x6488163F), UINT32_C(0x53599226), UINT32_C(0x66A45C69), + UINT32_C(0x9AD15A73), UINT32_C(0x924E2E43), UINT32_C(0x42A99D76), + UINT32_C(0x8B553DB7), UINT32_C(0x0451F521), UINT32_C(0x4BC6B53B) }, + { UINT32_C(0x101F8AD6), UINT32_C(0xC029B5EF), UINT32_C(0xC507EED9), + UINT32_C(0x6A4DA71C), UINT32_C(0x30BB22F3), UINT32_C(0x3ADFAEC0), + UINT32_C(0xB514F85B), UINT32_C(0x81BCAF7A), UINT32_C(0x5A7E60D3), + UINT32_C(0x2E1E6EFF), UINT32_C(0xAE39D42F), UINT32_C(0x5270ABC0) } }, + { { UINT32_C(0x3901F0F8), UINT32_C(0x86D56DEB), UINT32_C(0xEED5F650), + UINT32_C(0x1D0BC792), UINT32_C(0xCA1114A3), UINT32_C(0x1A2DDFD8), + UINT32_C(0xF1DD316D), UINT32_C(0x94ABF4B1), UINT32_C(0x3D9F18EF), + UINT32_C(0xF72179E4), UINT32_C(0x9AA2CABF), UINT32_C(0x52A0921E) }, + { UINT32_C(0xA7452883), UINT32_C(0xECDA9E27), UINT32_C(0xAFD771B4), + UINT32_C(0x7E90850A), UINT32_C(0x9CC0465C), UINT32_C(0xD40F87EA), + UINT32_C(0x865CDA36), UINT32_C(0x8CFCB60A), UINT32_C(0x7C650942), + UINT32_C(0x3DBEC2CC), UINT32_C(0xE718CA9D), UINT32_C(0x071A4EE7) } }, + { { UINT32_C(0x276AC5F3), UINT32_C(0x73C0E4FF), UINT32_C(0xBDB97EA1), + UINT32_C(0xE7BA5A6A), UINT32_C(0xC5808398), UINT32_C(0x638CA54E), + UINT32_C(0x413855E5), UINT32_C(0x8258DC82), UINT32_C(0x57F07614), + UINT32_C(0x35DDD2E9), UINT32_C(0x1DC13BF9), UINT32_C(0xF98DD692) }, + { UINT32_C(0xF16DCD84), UINT32_C(0x3A4C0088), UINT32_C(0x833D83F9), + UINT32_C(0xF192EADD), UINT32_C(0xA6D61D29), UINT32_C(0x3C26C931), + UINT32_C(0xDE0AD7A1), UINT32_C(0x589FDD52), UINT32_C(0x0442D37F), + UINT32_C(0x7CD83DD2), UINT32_C(0x403ECBFC), UINT32_C(0x1E47E777) } }, + }, + { + { { UINT32_C(0x70D4D7BC), UINT32_C(0x2AF8ED81), UINT32_C(0xB632435C), + UINT32_C(0xABC3E15F), UINT32_C(0x78219356), UINT32_C(0x4C0E726F), + UINT32_C(0xB87254C4), UINT32_C(0x8C1962A1), UINT32_C(0xC9E7691A), + UINT32_C(0x30796A71), UINT32_C(0xA75A12EE), UINT32_C(0xD453EF19) }, + { UINT32_C(0x13AE4964), UINT32_C(0x535F42C2), UINT32_C(0x0DA9586A), + UINT32_C(0x86831C3C), UINT32_C(0xE39A7A58), UINT32_C(0xB7F1EF35), + UINT32_C(0xD459B91A), UINT32_C(0xA2789AE2), UINT32_C(0x02FD429D), + UINT32_C(0xEADBCA7F), UINT32_C(0x65290F57), UINT32_C(0x94F215D4) } }, + { { UINT32_C(0x1CFB79AC), UINT32_C(0x37ED2BE5), UINT32_C(0xE7AF84C3), + UINT32_C(0x801946F3), UINT32_C(0xE77C2F00), UINT32_C(0xB061AD8A), + UINT32_C(0x44DE16A8), UINT32_C(0xE87E1A9A), UINT32_C(0x7EE490FF), + UINT32_C(0xDF4F57C8), UINT32_C(0x005993ED), UINT32_C(0x4E793B49) }, + { UINT32_C(0xBCCB593F), UINT32_C(0xE1036387), UINT32_C(0x95E09B80), + UINT32_C(0xF1749411), UINT32_C(0x5AB42F91), UINT32_C(0x59CB20D1), + UINT32_C(0xAC0FF033), UINT32_C(0xA738A18D), UINT32_C(0x2AC1E7F4), + UINT32_C(0xDA501A2E), UINT32_C(0x84D8A6E0), UINT32_C(0x1B67EDA0) } }, + { { UINT32_C(0x1080E90B), UINT32_C(0x1D27EFCE), UINT32_C(0x3FD01DC6), + UINT32_C(0xA2815246), UINT32_C(0xCAA26D18), UINT32_C(0x99A3FB83), + UINT32_C(0xB82BABBE), UINT32_C(0xD27E6133), UINT32_C(0xD783DD60), + UINT32_C(0x61030DFD), UINT32_C(0x73C78CB8), UINT32_C(0x295A2913) }, + { UINT32_C(0x68BE6A92), UINT32_C(0x8707A2CF), UINT32_C(0xEEB3474A), + UINT32_C(0xC9C2FB98), UINT32_C(0xA2B176B8), UINT32_C(0x7C3FD412), + UINT32_C(0xC7202101), UINT32_C(0xD5B52E2F), UINT32_C(0xF0A6D536), + UINT32_C(0x24A63030), UINT32_C(0x04648EC0), UINT32_C(0x05842DE3) } }, + { { UINT32_C(0x30577AC9), UINT32_C(0x67477CDC), UINT32_C(0x244F92A8), + UINT32_C(0x51DD9775), UINT32_C(0x917EEC66), UINT32_C(0x31FD60B9), + UINT32_C(0xD66C5C1D), UINT32_C(0xACD95BD4), UINT32_C(0xBF9508BA), + UINT32_C(0x2E0551F3), UINT32_C(0x688CB243), UINT32_C(0x121168E1) }, + { UINT32_C(0x4540D230), UINT32_C(0x8C039740), UINT32_C(0x009ECDF9), + UINT32_C(0xC4ED3CF6), UINT32_C(0x44DB62AF), UINT32_C(0x191825E1), + UINT32_C(0xC4A030DA), UINT32_C(0x3EE8ACAB), UINT32_C(0x94081504), + UINT32_C(0x8AB154A8), UINT32_C(0x486C9CD0), UINT32_C(0x1FE09E4B) } }, + { { UINT32_C(0xD113450B), UINT32_C(0x512F82F9), UINT32_C(0x2DBC9197), + UINT32_C(0x5878C901), UINT32_C(0xE13F355B), UINT32_C(0xDB87412B), + UINT32_C(0x935B8A5E), UINT32_C(0x0A0A4A9B), UINT32_C(0xF25A5351), + UINT32_C(0x818587BD), UINT32_C(0x31E3D9C7), UINT32_C(0xE8079310) }, + { UINT32_C(0x611BC1B1), UINT32_C(0x8B1D47C7), UINT32_C(0x72A823F2), + UINT32_C(0x51722B58), UINT32_C(0x53B36B3E), UINT32_C(0x6F97EE8A), + UINT32_C(0x946DD453), UINT32_C(0x6E085AAC), UINT32_C(0xE65E6533), + UINT32_C(0x2EC5057D), UINT32_C(0x4BB18801), UINT32_C(0xF82D9D71) } }, + { { UINT32_C(0x8BA5AA8E), UINT32_C(0xAD81FA93), UINT32_C(0x8F7AA69E), + UINT32_C(0x723E628E), UINT32_C(0xEF35937C), UINT32_C(0x0BA7C2DE), + UINT32_C(0x6DECFB40), UINT32_C(0x83A43EC5), UINT32_C(0xE60C4F2D), + UINT32_C(0xF520F849), UINT32_C(0x457E3B5E), UINT32_C(0x8260E8AE) }, + { UINT32_C(0xBF1D9ED7), UINT32_C(0x7CE874F0), UINT32_C(0x7F1A5466), + UINT32_C(0x5FDE3553), UINT32_C(0x0C162DBB), UINT32_C(0x5A63777C), + UINT32_C(0xDAD87289), UINT32_C(0x0FD04F8C), UINT32_C(0x640761D5), + UINT32_C(0xCA2D9E0E), UINT32_C(0x38501ADB), UINT32_C(0x4615CFF8) } }, + { { UINT32_C(0x110B4A25), UINT32_C(0x9422789B), UINT32_C(0x70AD8CC1), + UINT32_C(0x5C26779F), UINT32_C(0xEC4F1E14), UINT32_C(0x4EE6A748), + UINT32_C(0x5C7AB5E0), UINT32_C(0xFB584A0D), UINT32_C(0xFB21EE66), + UINT32_C(0xED1DCB0B), UINT32_C(0x11C6863C), UINT32_C(0xDBED1F00) }, + { UINT32_C(0xB1B1D187), UINT32_C(0xD2969269), UINT32_C(0xAFE964E6), + UINT32_C(0xF7D0C3F2), UINT32_C(0x12BB865E), UINT32_C(0xE05EE93F), + UINT32_C(0xED79118E), UINT32_C(0x1AFB7BEE), UINT32_C(0x0F0FE453), + UINT32_C(0x220AF138), UINT32_C(0x52782AB9), UINT32_C(0x1463AA1A) } }, + { { UINT32_C(0xD7DBE5F9), UINT32_C(0x7C139D56), UINT32_C(0x0B83685B), + UINT32_C(0xFC16E611), UINT32_C(0x9018463C), UINT32_C(0xFA723C02), + UINT32_C(0x840BF5D7), UINT32_C(0xC472458C), UINT32_C(0x0AF07591), + UINT32_C(0x4D809359), UINT32_C(0x3308DFD9), UINT32_C(0x418D8830) }, + { UINT32_C(0x0C365AE3), UINT32_C(0x9B381E04), UINT32_C(0xF8190FD1), + UINT32_C(0x3780BF33), UINT32_C(0xDD03E854), UINT32_C(0x45397418), + UINT32_C(0x4E51E491), UINT32_C(0xA95D030F), UINT32_C(0xE3286CEA), + UINT32_C(0x87C8C686), UINT32_C(0x900B5F83), UINT32_C(0x01C773BF) } }, + { { UINT32_C(0x78673B02), UINT32_C(0xDABE3475), UINT32_C(0xF6E7395E), + UINT32_C(0x4F0F25CE), UINT32_C(0xD181AD45), UINT32_C(0x3117ABB9), + UINT32_C(0xAA13DE0B), UINT32_C(0x4B559F88), UINT32_C(0xEA7C9745), + UINT32_C(0xFD8EFE78), UINT32_C(0x5DD21682), UINT32_C(0x08060047) }, + { UINT32_C(0xD4C86FFC), UINT32_C(0xC0F5DE4B), UINT32_C(0xF21AB6A2), + UINT32_C(0x4BB14B1E), UINT32_C(0xF50C1D12), UINT32_C(0xACB53A6C), + UINT32_C(0x5CC9162E), UINT32_C(0x46AAC450), UINT32_C(0x2DE240B6), + UINT32_C(0x049C51E0), UINT32_C(0xE383C3B0), UINT32_C(0xBB2DC016) } }, + { { UINT32_C(0x8E438C92), UINT32_C(0xA3C56AD2), UINT32_C(0xB2CEAF1A), + UINT32_C(0x7C43F98F), UINT32_C(0xE2150778), UINT32_C(0x397C44F7), + UINT32_C(0x71A24131), UINT32_C(0x48D17AB7), UINT32_C(0x1E2ACDA9), + UINT32_C(0xCC513863), UINT32_C(0xF0C9BAC9), UINT32_C(0x2C76A55E) }, + { UINT32_C(0x7EA4BB7B), UINT32_C(0x4D74CDCE), UINT32_C(0xB1B3C2BA), + UINT32_C(0x834BD5BF), UINT32_C(0xCCC310A4), UINT32_C(0x46E2911E), + UINT32_C(0x0FC1BF13), UINT32_C(0xD3DE84AA), UINT32_C(0x80A03AD3), + UINT32_C(0x27F2892F), UINT32_C(0x3BD2F08B), UINT32_C(0x85B47620) } }, + { { UINT32_C(0x567AF533), UINT32_C(0xAB1CB818), UINT32_C(0xBAC2705A), + UINT32_C(0x273B4537), UINT32_C(0x22C84AB6), UINT32_C(0x133066C4), + UINT32_C(0x4830BFC1), UINT32_C(0xC3590DE6), UINT32_C(0x5E4742D0), + UINT32_C(0xEA297869), UINT32_C(0x4F3164C0), UINT32_C(0xF6D8C694) }, + { UINT32_C(0xC1249588), UINT32_C(0x09E85F3D), UINT32_C(0x4EC64DF7), + UINT32_C(0x6C2BB05D), UINT32_C(0x8B78000F), UINT32_C(0xD267115E), + UINT32_C(0xC7E4A316), UINT32_C(0x07C5D7AE), UINT32_C(0x4619E5BD), + UINT32_C(0xCB1187BA), UINT32_C(0xA43F7EEE), UINT32_C(0x57B1D4EF) } }, + { { UINT32_C(0xC8176A96), UINT32_C(0x3618891F), UINT32_C(0xE5808B97), + UINT32_C(0x62C4B084), UINT32_C(0x4DD95D6E), UINT32_C(0xDE558546), + UINT32_C(0x730B2EA4), UINT32_C(0x27A8133E), UINT32_C(0x6AF318A0), + UINT32_C(0xE07CEEC3), UINT32_C(0xCE24FD2C), UINT32_C(0x0ACC1286) }, + { UINT32_C(0xDD4D307C), UINT32_C(0x8A48FE4A), UINT32_C(0x18CDE0DA), + UINT32_C(0x71A9BA9C), UINT32_C(0xD5D79747), UINT32_C(0x655E2B66), + UINT32_C(0xA79AEDC7), UINT32_C(0x409FE856), UINT32_C(0xD287E5CF), + UINT32_C(0xC5A9F244), UINT32_C(0x4E82EC39), UINT32_C(0xCCE10384) } }, + { { UINT32_C(0xF25D364C), UINT32_C(0x00675BA7), UINT32_C(0x68D36BDF), + UINT32_C(0x7A7F1629), UINT32_C(0xA9E23F29), UINT32_C(0x35EC468A), + UINT32_C(0x2D926E6C), UINT32_C(0xF797AC50), UINT32_C(0x4B4F4376), + UINT32_C(0x639BA453), UINT32_C(0x51FF9519), UINT32_C(0xD71B430F) }, + { UINT32_C(0x2CF5635C), UINT32_C(0xB8C439EC), UINT32_C(0x81980393), + UINT32_C(0x0CE4C8D1), UINT32_C(0x64123B15), UINT32_C(0x4C5362A9), + UINT32_C(0xFFDCF096), UINT32_C(0x6E0421E0), UINT32_C(0x10D1F914), + UINT32_C(0x624A855F), UINT32_C(0x614DCD29), UINT32_C(0x7D8F3AB7) } }, + { { UINT32_C(0xB3493CE0), UINT32_C(0xD9219ADA), UINT32_C(0x52F09AE5), + UINT32_C(0x971B243A), UINT32_C(0xE24E3674), UINT32_C(0xC16C9BF8), + UINT32_C(0xCE68C7CD), UINT32_C(0x026D408D), UINT32_C(0x358209E3), + UINT32_C(0xF9B33DD9), UINT32_C(0xF3B2A206), UINT32_C(0x02D0595D) }, + { UINT32_C(0x60D15640), UINT32_C(0xBF994271), UINT32_C(0x15B5466A), + UINT32_C(0x6DA7A04E), UINT32_C(0x1CADB50D), UINT32_C(0x03AA4ED8), + UINT32_C(0x129A4253), UINT32_C(0x1548F029), UINT32_C(0xB842865A), + UINT32_C(0x41741F7E), UINT32_C(0xA3F88C98), UINT32_C(0x859FE0A4) } }, + { { UINT32_C(0x05FD7553), UINT32_C(0x80DE085A), UINT32_C(0xB897566B), + UINT32_C(0x4A4AB91E), UINT32_C(0x2F1C173F), UINT32_C(0x33BCD475), + UINT32_C(0xC100C013), UINT32_C(0x4E238896), UINT32_C(0xD614B34B), + UINT32_C(0x1C88500D), UINT32_C(0xC3BA9E23), UINT32_C(0x0401C5F6) }, + { UINT32_C(0xD0AF0DE5), UINT32_C(0x8E8003C4), UINT32_C(0x9D0DCBB9), + UINT32_C(0x19B1DFB5), UINT32_C(0xEBEF7AB6), UINT32_C(0x4A3640A9), + UINT32_C(0x959B15F6), UINT32_C(0xEDAFD65B), UINT32_C(0x7FB95821), + UINT32_C(0x8092EF7F), UINT32_C(0xCE2E45D1), UINT32_C(0xAB8DD52E) } }, + { { UINT32_C(0xB9CFE6BF), UINT32_C(0xD1F2D6B8), UINT32_C(0x00073F6F), + UINT32_C(0x6358810B), UINT32_C(0xD712106E), UINT32_C(0x5FCE5993), + UINT32_C(0x1C024C91), UINT32_C(0x5EE6B271), UINT32_C(0x453DB663), + UINT32_C(0xD0248FF5), UINT32_C(0xADB835E8), UINT32_C(0xD6D81CB2) }, + { UINT32_C(0xFDFCB4C7), UINT32_C(0x8696CFEC), UINT32_C(0x53BC9045), + UINT32_C(0x696B7FCB), UINT32_C(0xDDA56981), UINT32_C(0xAB4D3807), + UINT32_C(0x1E4B943B), UINT32_C(0x2F998052), UINT32_C(0x166B7F18), + UINT32_C(0x8AA76ADB), UINT32_C(0x52A2D7ED), UINT32_C(0x63934301) } }, + }, + { + { { UINT32_C(0xA368EFF6), UINT32_C(0xBBCCCE39), UINT32_C(0x8CEB5C43), + UINT32_C(0xD8CAABDF), UINT32_C(0xD2252FDA), UINT32_C(0x9EAE35A5), + UINT32_C(0x54E7DD49), UINT32_C(0xA8F4F209), UINT32_C(0x295100FD), + UINT32_C(0xA56D72A6), UINT32_C(0x56767727), UINT32_C(0x20FC1FE8) }, + { UINT32_C(0x0BBAA5AB), UINT32_C(0xBF60B248), UINT32_C(0x313911F2), + UINT32_C(0xA4F3CE5A), UINT32_C(0xB93DAB9C), UINT32_C(0xC2A67AD4), + UINT32_C(0x22D71F39), UINT32_C(0x18CD0ED0), UINT32_C(0x5F304DB2), + UINT32_C(0x04380C42), UINT32_C(0x6729C821), UINT32_C(0x26420CBB) } }, + { { UINT32_C(0xBDFBCAE8), UINT32_C(0x26BD07D6), UINT32_C(0xDF01A80A), + UINT32_C(0x10B5173F), UINT32_C(0x6798B96C), UINT32_C(0xD831C546), + UINT32_C(0x1D3F3859), UINT32_C(0x1D6B4108), UINT32_C(0x991B9EC7), + UINT32_C(0x501D38EC), UINT32_C(0xD78431A9), UINT32_C(0x26319283) }, + { UINT32_C(0x118B343C), UINT32_C(0x8B85BAF7), UINT32_C(0x58DEF7D0), + UINT32_C(0x4696CDDD), UINT32_C(0x7ACDCF58), UINT32_C(0xEFC7C110), + UINT32_C(0x848D5842), UINT32_C(0xD9AF415C), UINT32_C(0x0AC7FDAC), + UINT32_C(0x6B5A06BC), UINT32_C(0xA344319B), UINT32_C(0x7D623E0D) } }, + { { UINT32_C(0x0C9D3547), UINT32_C(0x4C0D7806), UINT32_C(0xCF2AED47), + UINT32_C(0x993F048D), UINT32_C(0xE4B57E22), UINT32_C(0x5217C453), + UINT32_C(0xF4172B28), UINT32_C(0xB4669E35), UINT32_C(0x49F999F8), + UINT32_C(0x509A3CD0), UINT32_C(0x87C69D41), UINT32_C(0xD19F8632) }, + { UINT32_C(0x4C8FDED0), UINT32_C(0xE14D01E8), UINT32_C(0xEAFD9E1C), + UINT32_C(0x342880FD), UINT32_C(0x70DC2BF0), UINT32_C(0x0E17BFF2), + UINT32_C(0xC0186400), UINT32_C(0x46560B7B), UINT32_C(0x49A4DD34), + UINT32_C(0xE28C7B9C), UINT32_C(0x0F325D06), UINT32_C(0x18211916) } }, + { { UINT32_C(0xD7E02E18), UINT32_C(0x46D70888), UINT32_C(0xD9F11FD9), + UINT32_C(0x7C806954), UINT32_C(0x4FBEA271), UINT32_C(0xE4948FCA), + UINT32_C(0xBD80A9DF), UINT32_C(0x7D6C7765), UINT32_C(0xF3871C71), + UINT32_C(0x1B470EA6), UINT32_C(0x8330A570), UINT32_C(0xD62DE244) }, + { UINT32_C(0xC659C3A7), UINT32_C(0xDAECDDC1), UINT32_C(0x077F7AFC), + UINT32_C(0x8621E513), UINT32_C(0xCAEEEF13), UINT32_C(0x56C7CD84), + UINT32_C(0xC685A356), UINT32_C(0xC60C910F), UINT32_C(0x9DD93DDC), + UINT32_C(0xE68BC5C5), UINT32_C(0xFEB64895), UINT32_C(0xD904E89F) } }, + { { UINT32_C(0x8BA7917A), UINT32_C(0x75D874FB), UINT32_C(0xFD043BD4), + UINT32_C(0x18FA7F53), UINT32_C(0x1FC3979E), UINT32_C(0x212A0AD7), + UINT32_C(0x5D6EAC0E), UINT32_C(0x5703A7D9), UINT32_C(0x017DEAD5), + UINT32_C(0x222F7188), UINT32_C(0x0F6C1817), UINT32_C(0x1EC687B7) }, + { UINT32_C(0x238BACB6), UINT32_C(0x23412FC3), UINT32_C(0x54CED154), + UINT32_C(0xB85D70E9), UINT32_C(0xBDA674D0), UINT32_C(0xD4E06722), + UINT32_C(0x36F5A0C2), UINT32_C(0x3EA5F178), UINT32_C(0xF5C6D2CA), + UINT32_C(0x7E7D79CF), UINT32_C(0x3DBB3C73), UINT32_C(0x1FFF9464) } }, + { { UINT32_C(0xF163E4A8), UINT32_C(0x916E19D0), UINT32_C(0x1489DF17), + UINT32_C(0x1E6740E7), UINT32_C(0x339F3A47), UINT32_C(0x1EAF9723), + UINT32_C(0x124B8DAD), UINT32_C(0x22F0ED1A), UINT32_C(0x49C3DD04), + UINT32_C(0x39C9166C), UINT32_C(0xCE1E9ACC), UINT32_C(0x628E7FD4) }, + { UINT32_C(0x40031676), UINT32_C(0x124DDF27), UINT32_C(0x1EDDB9BE), + UINT32_C(0x00256939), UINT32_C(0xD360B0DA), UINT32_C(0xD39E25E7), + UINT32_C(0x4AA6C4C9), UINT32_C(0x6E3015A8), UINT32_C(0x623EDA09), + UINT32_C(0xC6A2F643), UINT32_C(0x50AA99FB), UINT32_C(0xBEFF2D12) } }, + { { UINT32_C(0x93EE8089), UINT32_C(0x1FEEF7CE), UINT32_C(0x252DD7BD), + UINT32_C(0xC6B180BC), UINT32_C(0x1788F051), UINT32_C(0xA16FB20B), + UINT32_C(0xE046ED39), UINT32_C(0xD86FD392), UINT32_C(0x9378CE1D), + UINT32_C(0xDA0A3611), UINT32_C(0xA5F7A61D), UINT32_C(0x121EF3E7) }, + { UINT32_C(0x92D13CAE), UINT32_C(0x94D22061), UINT32_C(0x77C72E08), + UINT32_C(0x5076046A), UINT32_C(0x7D2308B9), UINT32_C(0xF18BC233), + UINT32_C(0x17F977B1), UINT32_C(0x004DB3C5), UINT32_C(0x0471C11D), + UINT32_C(0xD05AE399), UINT32_C(0x85CD1726), UINT32_C(0x86A2A557) } }, + { { UINT32_C(0x72107804), UINT32_C(0xB8D9B286), UINT32_C(0x3303B79B), + UINT32_C(0xB5A7C413), UINT32_C(0x5FA37DED), UINT32_C(0x927EEF78), + UINT32_C(0xAD67DABA), UINT32_C(0xA1C5CF1E), UINT32_C(0x7360E7C7), + UINT32_C(0xAA5E3FB2), UINT32_C(0x0A0C0993), UINT32_C(0x8354E61A) }, + { UINT32_C(0x7F5458CC), UINT32_C(0x2EC73AF9), UINT32_C(0x48474325), + UINT32_C(0xDE4CB488), UINT32_C(0x7209BC69), UINT32_C(0x2DD134C7), + UINT32_C(0x451A2ABE), UINT32_C(0xB70C5567), UINT32_C(0x8E293018), + UINT32_C(0x2CD1B200), UINT32_C(0xD33C0D72), UINT32_C(0x15F8DA7A) } }, + { { UINT32_C(0xA8790657), UINT32_C(0x5DC386D0), UINT32_C(0xBC4D88BB), + UINT32_C(0xA4FDF676), UINT32_C(0x48BC6C49), UINT32_C(0x1B21F38F), + UINT32_C(0x543A7003), UINT32_C(0xCDCC7FAA), UINT32_C(0x8C9CF72C), + UINT32_C(0xEA97E7AA), UINT32_C(0x50D938A8), UINT32_C(0xA6B883F4) }, + { UINT32_C(0xA3A10F27), UINT32_C(0x51936F3A), UINT32_C(0xDECC76BF), + UINT32_C(0x0170785F), UINT32_C(0x908C578A), UINT32_C(0x7539ECE1), + UINT32_C(0x0F3E8C25), UINT32_C(0x5D9C8A8E), UINT32_C(0x9E4717A7), + UINT32_C(0x8681B43B), UINT32_C(0xA9D83E39), UINT32_C(0x94F42507) } }, + { { UINT32_C(0xA55ADDE7), UINT32_C(0xBBE11CA8), UINT32_C(0x3BC0896B), + UINT32_C(0x39E6F5CF), UINT32_C(0x1D2D8D94), UINT32_C(0x1447314E), + UINT32_C(0x5B012F8A), UINT32_C(0x45B48125), UINT32_C(0x08AD5283), + UINT32_C(0x41AD23FA), UINT32_C(0x41D13774), UINT32_C(0x837243E2) }, + { UINT32_C(0xBADCAA46), UINT32_C(0x1FC0BD9D), UINT32_C(0x26E84CAE), + UINT32_C(0x8DF164ED), UINT32_C(0x41017176), UINT32_C(0x8FF70EC0), + UINT32_C(0x5C848BA7), UINT32_C(0x23AD4BCE), UINT32_C(0x97A19CBB), + UINT32_C(0x89246FDE), UINT32_C(0x78397991), UINT32_C(0xA5EF987B) } }, + { { UINT32_C(0x4757964D), UINT32_C(0x111AF1B7), UINT32_C(0xDDBBF258), + UINT32_C(0x1D25D351), UINT32_C(0x7D2B06D6), UINT32_C(0x4161E776), + UINT32_C(0x1CAC0C5B), UINT32_C(0x6EFD2691), UINT32_C(0x211BFAEB), + UINT32_C(0x633B95DB), UINT32_C(0xE2BDF701), UINT32_C(0x9BEDFA5A) }, + { UINT32_C(0x73E099C8), UINT32_C(0xADAC2B0B), UINT32_C(0xBFB16BFF), + UINT32_C(0x436F0023), UINT32_C(0x30F55854), UINT32_C(0xB91B1002), + UINT32_C(0xF4C6C8B7), UINT32_C(0xAF6A2097), UINT32_C(0x3AD7B3D9), + UINT32_C(0x3FF65CED), UINT32_C(0x330E56DF), UINT32_C(0x6FA2626F) } }, + { { UINT32_C(0xFFCCFD07), UINT32_C(0x3D28BF2D), UINT32_C(0xD989603B), + UINT32_C(0x0514F6FF), UINT32_C(0x5514787A), UINT32_C(0xB9519629), + UINT32_C(0xC3DB4E9C), UINT32_C(0xA1848121), UINT32_C(0x2A3D4595), + UINT32_C(0x47FE2E39), UINT32_C(0x11B73ED4), UINT32_C(0x506F5D82) }, + { UINT32_C(0xA600D8BB), UINT32_C(0xA2257AE7), UINT32_C(0x0F9F122C), + UINT32_C(0xD659DBD1), UINT32_C(0x64DF160F), UINT32_C(0xDB0FDC67), + UINT32_C(0x7CB19690), UINT32_C(0xFF379339), UINT32_C(0x98E72EC1), + UINT32_C(0xDF4366B8), UINT32_C(0xDF437EB8), UINT32_C(0x97E72BEC) } }, + { { UINT32_C(0x1C81E5D9), UINT32_C(0x81DCEA27), UINT32_C(0x6717FC49), + UINT32_C(0x7E1B6CDA), UINT32_C(0x11EAE80D), UINT32_C(0xAA36B3B5), + UINT32_C(0x3CD7CBB3), UINT32_C(0x1306687C), UINT32_C(0xC4E89064), + UINT32_C(0xED670235), UINT32_C(0x58A94760), UINT32_C(0x9D3B0009) }, + { UINT32_C(0xE6A6333C), UINT32_C(0x5A64E158), UINT32_C(0x49453203), + UINT32_C(0x1A8B4A36), UINT32_C(0x1F77CC21), UINT32_C(0xF1CAD724), + UINT32_C(0x70518EF7), UINT32_C(0x693EBB4B), UINT32_C(0x0F39C91A), + UINT32_C(0xFB47BD81), UINT32_C(0xFA4BC64B), UINT32_C(0xCFE63DA2) } }, + { { UINT32_C(0xEAA66108), UINT32_C(0x82C1C684), UINT32_C(0x4CFE79FC), + UINT32_C(0xE3226218), UINT32_C(0x849C720E), UINT32_C(0x3F28B72B), + UINT32_C(0x8FEE1CA8), UINT32_C(0x137FB355), UINT32_C(0xE4F90C4E), + UINT32_C(0x4D18A9CD), UINT32_C(0xCC3E46FA), UINT32_C(0xC0344227) }, + { UINT32_C(0x79CDA392), UINT32_C(0x4FD5C08E), UINT32_C(0x8ADC87B5), + UINT32_C(0x65DB20DB), UINT32_C(0x916C1B84), UINT32_C(0x86F95D5B), + UINT32_C(0x17BB2B7C), UINT32_C(0x7EDA3871), UINT32_C(0x669A533B), + UINT32_C(0x18CCF7E7), UINT32_C(0xECAD0E06), UINT32_C(0x5E92421C) } }, + { { UINT32_C(0x4174B08B), UINT32_C(0x26063E12), UINT32_C(0x70DE8E4D), + UINT32_C(0xE621D9BE), UINT32_C(0x5ECDF350), UINT32_C(0xAEA0FD0F), + UINT32_C(0x9C20E5C9), UINT32_C(0x0D9F69E4), UINT32_C(0x0BBE2918), + UINT32_C(0xD3DADEB9), UINT32_C(0x58AA2F71), UINT32_C(0xD7B9B5DB) }, + { UINT32_C(0x3364CAF8), UINT32_C(0x7A971DD7), UINT32_C(0xC25D4BE4), + UINT32_C(0x702616A3), UINT32_C(0xA9E30071), UINT32_C(0xA30F0FA1), + UINT32_C(0x5573BC69), UINT32_C(0x98AB2438), UINT32_C(0x6FEC2E22), + UINT32_C(0xCBC63CDF), UINT32_C(0xCC901B9B), UINT32_C(0x965F90ED) } }, + { { UINT32_C(0x71E15BB3), UINT32_C(0xD53B592D), UINT32_C(0x8820E0D0), + UINT32_C(0x1F03C0E9), UINT32_C(0x3CCCB726), UINT32_C(0xCE93947D), + UINT32_C(0x1D547590), UINT32_C(0x2790FEE0), UINT32_C(0xC59CDD7A), + UINT32_C(0x4401D847), UINT32_C(0xA926DD9D), UINT32_C(0x72D69120) }, + { UINT32_C(0x4229F289), UINT32_C(0x38B8F21D), UINT32_C(0x7FE978AF), + UINT32_C(0x9F412E40), UINT32_C(0xCDB59AF1), UINT32_C(0xAE07901B), + UINT32_C(0xD1D4715E), UINT32_C(0x1E6BE5EB), UINT32_C(0x18C96BEF), + UINT32_C(0x3715BD8B), UINT32_C(0xE11B3798), UINT32_C(0x4B71F6E6) } }, + }, + { + { { UINT32_C(0xF0CE2DF4), UINT32_C(0x11A8FDE5), UINT32_C(0xFA8D26DF), + UINT32_C(0xBC70CA3E), UINT32_C(0xC74DFE82), UINT32_C(0x6818C275), + UINT32_C(0x38373A50), UINT32_C(0x2B0294AC), UINT32_C(0xE8E5F88F), + UINT32_C(0x584C4061), UINT32_C(0x7342383A), UINT32_C(0x1C05C1CA) }, + { UINT32_C(0x911430EC), UINT32_C(0x263895B3), UINT32_C(0xA5171453), + UINT32_C(0xEF9B0032), UINT32_C(0x84DA7F0C), UINT32_C(0x144359DA), + UINT32_C(0x924A09F2), UINT32_C(0x76E3095A), UINT32_C(0xD69AD835), + UINT32_C(0x612986E3), UINT32_C(0x392122AF), UINT32_C(0x70E03ADA) } }, + { { UINT32_C(0x67AAD17B), UINT32_C(0xFEB707EE), UINT32_C(0x83042995), + UINT32_C(0xBB21B287), UINT32_C(0x9A0D32BA), UINT32_C(0x26DE1645), + UINT32_C(0x1FFB9266), UINT32_C(0x9A2FF38A), UINT32_C(0x8F578B4A), + UINT32_C(0x4E5AD96D), UINT32_C(0x883E7443), UINT32_C(0x26CC0655) }, + { UINT32_C(0x2EE9367A), UINT32_C(0x1D8EECAB), UINT32_C(0x881DE2F8), + UINT32_C(0x42B84337), UINT32_C(0xD758AE41), UINT32_C(0xE49B2FAE), + UINT32_C(0x4A85D867), UINT32_C(0x6A9A2290), UINT32_C(0xE68CBA86), + UINT32_C(0x2FB89DCE), UINT32_C(0x7F09A982), UINT32_C(0xBC252635) } }, + { { UINT32_C(0x8C61AAAC), UINT32_C(0xADC79436), UINT32_C(0x5E926563), + UINT32_C(0x24C7FD13), UINT32_C(0x0406C129), UINT32_C(0xEF9FAAA4), + UINT32_C(0x8B658D3C), UINT32_C(0xF4E6388C), UINT32_C(0x1E435BAF), + UINT32_C(0x7262BEB4), UINT32_C(0xFDAEAC99), UINT32_C(0x3BF622CC) }, + { UINT32_C(0x4E1AEDDC), UINT32_C(0xD359F7D8), UINT32_C(0xD78C17B7), + UINT32_C(0x05DC4F8C), UINT32_C(0x29498BA5), UINT32_C(0xB18CF032), + UINT32_C(0x85BF35AD), UINT32_C(0xC67388CA), UINT32_C(0x62AA4BC8), + UINT32_C(0x8A7A6AA2), UINT32_C(0x72F4627A), UINT32_C(0x0B8F458E) } }, + { { UINT32_C(0xC68E4488), UINT32_C(0x3FB812EE), UINT32_C(0x60EF7281), + UINT32_C(0x53C5EAA4), UINT32_C(0x8FBEFBE4), UINT32_C(0xE5724183), + UINT32_C(0xA4B24A05), UINT32_C(0x2B7D49F4), UINT32_C(0x710C0A43), + UINT32_C(0x23B138D0), UINT32_C(0xA85EC1DB), UINT32_C(0x16A5B4C1) }, + { UINT32_C(0x305FEB02), UINT32_C(0x7CC1F3D7), UINT32_C(0x5B6C1B54), + UINT32_C(0x52F7947D), UINT32_C(0x8F56981C), UINT32_C(0x1BDA2312), + UINT32_C(0xB4080A01), UINT32_C(0x68663EAE), UINT32_C(0x9F999B7F), + UINT32_C(0x8DD7BA7E), UINT32_C(0xB686580C), UINT32_C(0xD8768D19) } }, + { { UINT32_C(0x7AFDDA94), UINT32_C(0xBCD0E0AD), UINT32_C(0x34A30687), + UINT32_C(0x95A0DBBE), UINT32_C(0x8C5E2665), UINT32_C(0xBBE3C3DF), + UINT32_C(0xEBF2BC16), UINT32_C(0x742BECD8), UINT32_C(0x3FA163A6), + UINT32_C(0x300CEB48), UINT32_C(0x4663354B), UINT32_C(0x0C5D02EE) }, + { UINT32_C(0xB5E606A4), UINT32_C(0xE4FB9AD6), UINT32_C(0xCF49FF95), + UINT32_C(0x93F507B8), UINT32_C(0x585C193B), UINT32_C(0x9406A90C), + UINT32_C(0x4ECF9517), UINT32_C(0xAD1440C1), UINT32_C(0x9CEA53F1), + UINT32_C(0x184CB475), UINT32_C(0x8EF11302), UINT32_C(0x6855C474) } }, + { { UINT32_C(0xEDCAFA52), UINT32_C(0x00ECB523), UINT32_C(0x086F69D3), + UINT32_C(0x0DA0AE0E), UINT32_C(0xC242F347), UINT32_C(0xC384DE15), + UINT32_C(0x848C12B7), UINT32_C(0xFB050E6E), UINT32_C(0x64E015CE), + UINT32_C(0x22F67654), UINT32_C(0x7CA122F2), UINT32_C(0xCBDC2A48) }, + { UINT32_C(0x445FB02C), UINT32_C(0xA940D973), UINT32_C(0x3767D89D), + UINT32_C(0x00F31E78), UINT32_C(0x613DABDD), UINT32_C(0x2B65A237), + UINT32_C(0xC875AE09), UINT32_C(0x2BE0AB05), UINT32_C(0xBA204F8E), + UINT32_C(0xB22E54FD), UINT32_C(0x0F7687B9), UINT32_C(0x65E2029D) } }, + { { UINT32_C(0x1855A71C), UINT32_C(0xFFD82538), UINT32_C(0x438BD8D8), + UINT32_C(0x26A330B3), UINT32_C(0xF9D8C5F9), UINT32_C(0x89628311), + UINT32_C(0x953738A0), UINT32_C(0x8D5FB9CF), UINT32_C(0xEDFCD4E5), + UINT32_C(0xCB7159C9), UINT32_C(0x2064C7C2), UINT32_C(0xD64E5230) }, + { UINT32_C(0x689F3CFE), UINT32_C(0xF858ED80), UINT32_C(0x56128B67), + UINT32_C(0x4830E309), UINT32_C(0xE0E90688), UINT32_C(0x2E1692DA), + UINT32_C(0xCA9CC232), UINT32_C(0xAB818913), UINT32_C(0xA5D229A6), + UINT32_C(0xE2E30C23), UINT32_C(0x0E740E23), UINT32_C(0xA544E8B1) } }, + { { UINT32_C(0xDC61E6CC), UINT32_C(0x1C15E569), UINT32_C(0x58FC7800), + UINT32_C(0x8FD72967), UINT32_C(0x37A9DFC5), UINT32_C(0xE61E7DB7), + UINT32_C(0x5AFD7822), UINT32_C(0x3F34A9C6), UINT32_C(0x19E80773), + UINT32_C(0x0A112742), UINT32_C(0x4760FC58), UINT32_C(0xA353460C) }, + { UINT32_C(0xB3124C71), UINT32_C(0x2FB7DEEB), UINT32_C(0x2D4009CC), + UINT32_C(0x48463627), UINT32_C(0xC3A10370), UINT32_C(0x399D1933), + UINT32_C(0x54388DBD), UINT32_C(0x7EB19450), UINT32_C(0x7C2A006A), + UINT32_C(0x8ECCE639), UINT32_C(0x55C932A0), UINT32_C(0x3D565DAF) } }, + { { UINT32_C(0xD9ADAE53), UINT32_C(0xCEF57A9F), UINT32_C(0xF83FD8CD), + UINT32_C(0xE2EB27D7), UINT32_C(0x9BBD2DDE), UINT32_C(0x4AC8F719), + UINT32_C(0xE91ABFB7), UINT32_C(0x604283AA), UINT32_C(0x34799F87), + UINT32_C(0xB6A4E115), UINT32_C(0xE4C2A8F3), UINT32_C(0x2B253224) }, + { UINT32_C(0xC8782294), UINT32_C(0xC34F8B92), UINT32_C(0xFCC2CB6B), + UINT32_C(0xC74D697D), UINT32_C(0xC2C84C46), UINT32_C(0xD990411B), + UINT32_C(0x31EA4955), UINT32_C(0x2807B5C6), UINT32_C(0xB9EB27F5), + UINT32_C(0x14AE2B93), UINT32_C(0x6163EDFA), UINT32_C(0xF0AE96A7) } }, + { { UINT32_C(0x42DB7180), UINT32_C(0xA7BDCBB4), UINT32_C(0xEDCA752F), + UINT32_C(0xC9FAA41F), UINT32_C(0xE820F401), UINT32_C(0x147F91B4), + UINT32_C(0xF5F2645F), UINT32_C(0x1E6CEF86), UINT32_C(0x31FE711D), + UINT32_C(0xB4AB4D7F), UINT32_C(0x743EF882), UINT32_C(0xCE68FB3C) }, + { UINT32_C(0x3EF2FCFF), UINT32_C(0xB9D7D682), UINT32_C(0x020DCAFD), + UINT32_C(0xF6893811), UINT32_C(0xBF81E760), UINT32_C(0x30D9A50C), + UINT32_C(0xB9B87228), UINT32_C(0x7F247D06), UINT32_C(0x5F40CFC0), + UINT32_C(0x143D4FEC), UINT32_C(0x329B2A88), UINT32_C(0x21D78D73) } }, + { { UINT32_C(0xED3F2055), UINT32_C(0x06B3FF8A), UINT32_C(0x522BE214), + UINT32_C(0x50482C77), UINT32_C(0xDDF54620), UINT32_C(0x8DF69CD8), + UINT32_C(0xF78A1165), UINT32_C(0x6D1DB204), UINT32_C(0x9AFE6BF2), + UINT32_C(0x459AE4A2), UINT32_C(0x24AC871E), UINT32_C(0xC23A9FFD) }, + { UINT32_C(0x89E85D81), UINT32_C(0xB7FD22E3), UINT32_C(0x122E9978), + UINT32_C(0x297F1F6B), UINT32_C(0x144BE1CE), UINT32_C(0xAB283D66), + UINT32_C(0xC00C614E), UINT32_C(0xC1F90AC2), UINT32_C(0x3224CD09), + UINT32_C(0x5465576E), UINT32_C(0x441B6059), UINT32_C(0x8E8D910D) } }, + { { UINT32_C(0xAAA228BC), UINT32_C(0xF73A060A), UINT32_C(0x56EFF87D), + UINT32_C(0xCF1B0783), UINT32_C(0xA54C9133), UINT32_C(0x11EF17C0), + UINT32_C(0x76A4DAA5), UINT32_C(0x9E476B15), UINT32_C(0x8018FB92), + UINT32_C(0x5624FEAC), UINT32_C(0xCFEEC1B9), UINT32_C(0x9826A0FC) }, + { UINT32_C(0x2DFE2046), UINT32_C(0xB732F7FE), UINT32_C(0x3B40DA6A), + UINT32_C(0x9260BD9F), UINT32_C(0x4F231773), UINT32_C(0xCC9F908F), + UINT32_C(0xDAFC0D55), UINT32_C(0x4827FEB9), UINT32_C(0x538ACE95), + UINT32_C(0x07D32E85), UINT32_C(0xB8EDAF37), UINT32_C(0xAD9F897C) } }, + { { UINT32_C(0xE3415498), UINT32_C(0x2F75B82F), UINT32_C(0xF1015F30), + UINT32_C(0xF99CAC5F), UINT32_C(0x7D7F25DE), UINT32_C(0x76640824), + UINT32_C(0xEE74C047), UINT32_C(0x714BC9CD), UINT32_C(0x07448879), + UINT32_C(0x70F847BF), UINT32_C(0x072165C0), UINT32_C(0xA14481DE) }, + { UINT32_C(0xDB1140A8), UINT32_C(0x9BFA59E3), UINT32_C(0xFCD13502), + UINT32_C(0x7B9C7FF0), UINT32_C(0x68459ABF), UINT32_C(0xF4D7538E), + UINT32_C(0xC8FC6AD2), UINT32_C(0xED93A791), UINT32_C(0xB51BD9B2), + UINT32_C(0xA8BBE2A8), UINT32_C(0x9FB34008), UINT32_C(0x084B5A27) } }, + { { UINT32_C(0xEB138C84), UINT32_C(0xB3BB9545), UINT32_C(0x3FC88BFD), + UINT32_C(0x59C3489C), UINT32_C(0x85F53EC7), UINT32_C(0x3A97FF63), + UINT32_C(0x0AA69C3D), UINT32_C(0x40FDF5A6), UINT32_C(0x53D19668), + UINT32_C(0x0E8CCEC7), UINT32_C(0x33FAA661), UINT32_C(0x0AA72EF9) }, + { UINT32_C(0x9B1E684B), UINT32_C(0xF5C5A6CF), UINT32_C(0x31A22EA1), + UINT32_C(0x630F9371), UINT32_C(0xAC60F7EA), UINT32_C(0x06B2AAC2), + UINT32_C(0x5BC37D80), UINT32_C(0xB181CAE2), UINT32_C(0x247B13EA), + UINT32_C(0x4601A929), UINT32_C(0x5F739797), UINT32_C(0x8A71C386) } }, + { { UINT32_C(0xAB134786), UINT32_C(0x545387B3), UINT32_C(0x1599B64A), + UINT32_C(0x3179BB06), UINT32_C(0x07593574), UINT32_C(0xB0A61986), + UINT32_C(0x63FA7C3B), UINT32_C(0xC7E39B21), UINT32_C(0x91585D13), + UINT32_C(0xA1173F86), UINT32_C(0xCB9525CD), UINT32_C(0x09D5CC8E) }, + { UINT32_C(0x8F3A3451), UINT32_C(0xAAD44FFD), UINT32_C(0x25820CC5), + UINT32_C(0x702B04F2), UINT32_C(0x1CB66C17), UINT32_C(0xE90CAC49), + UINT32_C(0xEE161DC4), UINT32_C(0x40F6B547), UINT32_C(0x1BA4AC4E), + UINT32_C(0xC08BB8B4), UINT32_C(0xAE5A6BC1), UINT32_C(0x7DC064FB) } }, + { { UINT32_C(0x9D76DDC7), UINT32_C(0x90A5E871), UINT32_C(0xEDFC8E2E), + UINT32_C(0x39DC8FAE), UINT32_C(0x5B079C62), UINT32_C(0x98467A23), + UINT32_C(0x05450C98), UINT32_C(0xE25E3785), UINT32_C(0x96140083), + UINT32_C(0x2FE23A4D), UINT32_C(0xE9900312), UINT32_C(0x65CE3B9A) }, + { UINT32_C(0x6B72B5D9), UINT32_C(0x1D87D088), UINT32_C(0xFD9AFC82), + UINT32_C(0x72F53220), UINT32_C(0x9E1F71FA), UINT32_C(0xC63C7C15), + UINT32_C(0x8D449637), UINT32_C(0x90DF26EA), UINT32_C(0xC1C2B215), + UINT32_C(0x97089F40), UINT32_C(0x42317FAA), UINT32_C(0x83AF2664) } }, + }, + { + { { UINT32_C(0x8D688E31), UINT32_C(0xFA2DB51A), UINT32_C(0xA09C88D4), + UINT32_C(0x225B696C), UINT32_C(0x6059171F), UINT32_C(0x9F88AF1D), + UINT32_C(0x782A0993), UINT32_C(0x1C5FEA5E), UINT32_C(0x4EC710D3), + UINT32_C(0xE0FB1588), UINT32_C(0xD32CE365), UINT32_C(0xFAF372E5) }, + { UINT32_C(0x26506F45), UINT32_C(0xD9F896AB), UINT32_C(0x8373C724), + UINT32_C(0x8D350338), UINT32_C(0xCA6E7342), UINT32_C(0x1B76992D), + UINT32_C(0x6FD0C08B), UINT32_C(0x76338FCA), UINT32_C(0xA00F5C23), + UINT32_C(0xC3EA4C65), UINT32_C(0xB316B35B), UINT32_C(0xDFAB29B3) } }, + { { UINT32_C(0x483AEBF9), UINT32_C(0x84E5541F), UINT32_C(0x49165772), + UINT32_C(0x8ADFF7DC), UINT32_C(0x9BEAAD3C), UINT32_C(0xE0A43AD6), + UINT32_C(0xF51C2714), UINT32_C(0x97DD1820), UINT32_C(0x57EA5B0C), + UINT32_C(0xAC2B4CB4), UINT32_C(0xD11767CA), UINT32_C(0x87DBD011) }, + { UINT32_C(0xBFC7957A), UINT32_C(0x18CCF36C), UINT32_C(0x1BC79227), + UINT32_C(0xD4A08841), UINT32_C(0xD8D292A8), UINT32_C(0x9811CE43), + UINT32_C(0xD58C4EE7), UINT32_C(0x72C5FC68), UINT32_C(0xD35C65A7), + UINT32_C(0x5BC0F0BE), UINT32_C(0xCBBF9669), UINT32_C(0x0B446DBC) } }, + { { UINT32_C(0x9CEE9BCE), UINT32_C(0x7EBA3DA6), UINT32_C(0xD5377750), + UINT32_C(0x3E2C1248), UINT32_C(0x2B93D8B2), UINT32_C(0x8C917D98), + UINT32_C(0x7CAD1F75), UINT32_C(0xCA8FC6AC), UINT32_C(0xA0FF150A), + UINT32_C(0x5F581F19), UINT32_C(0xE08327FA), UINT32_C(0x872CC14A) }, + { UINT32_C(0xE9333188), UINT32_C(0xC774F187), UINT32_C(0x497AF7E8), + UINT32_C(0x528ED4AC), UINT32_C(0x8AD72B10), UINT32_C(0xCE036E9B), + UINT32_C(0x917986CF), UINT32_C(0x463F9EBB), UINT32_C(0x1325CF9B), + UINT32_C(0xBE516328), UINT32_C(0xDD7E5FEA), UINT32_C(0xD28D5C50) } }, + { { UINT32_C(0xDD58BBE3), UINT32_C(0x714C1D1B), UINT32_C(0x039AFD0F), + UINT32_C(0x85BA01AE), UINT32_C(0x6951AC80), UINT32_C(0x7F23EA3A), + UINT32_C(0xAC00C837), UINT32_C(0x5C599290), UINT32_C(0xBF24CC1B), + UINT32_C(0xF6EFA2B3), UINT32_C(0x1E84462B), UINT32_C(0x393D8E42) }, + { UINT32_C(0xF8B89453), UINT32_C(0x9BDA627D), UINT32_C(0xB23E0D1B), + UINT32_C(0xE66FFF2E), UINT32_C(0xC3B94EC2), UINT32_C(0xD1EE7089), + UINT32_C(0x3031699A), UINT32_C(0xF75DBA6E), UINT32_C(0x242B2453), + UINT32_C(0x8FF75F79), UINT32_C(0x289BFED4), UINT32_C(0xE721EDEB) } }, + { { UINT32_C(0xC1390FA8), UINT32_C(0x083215A1), UINT32_C(0x6DCE8CE0), + UINT32_C(0x901D686A), UINT32_C(0x837073FF), UINT32_C(0x4AB1BA62), + UINT32_C(0x34BEABA5), UINT32_C(0x10C287AA), UINT32_C(0x46985239), + UINT32_C(0xB4931AF4), UINT32_C(0xB053C4DC), UINT32_C(0x07639899) }, + { UINT32_C(0xE721EECD), UINT32_C(0x29E7F44D), UINT32_C(0x57B3FF48), + UINT32_C(0x65817182), UINT32_C(0x5054E2E0), UINT32_C(0x198542E2), + UINT32_C(0x84616DE8), UINT32_C(0x923C9E15), UINT32_C(0xAD465BB9), + UINT32_C(0x2A9C15E1), UINT32_C(0x16319245), UINT32_C(0xD8D4EFC7) } }, + { { UINT32_C(0x9961A674), UINT32_C(0x72DC7943), UINT32_C(0xA0E13668), + UINT32_C(0x839A0A52), UINT32_C(0x334945EA), UINT32_C(0xD7A53FA9), + UINT32_C(0xE7AA25DB), UINT32_C(0xDB21DB77), UINT32_C(0x66E96DA3), + UINT32_C(0xB6675A7D), UINT32_C(0xE66F33C0), UINT32_C(0x2C31C406) }, + { UINT32_C(0x6EC7B9CB), UINT32_C(0x45020B62), UINT32_C(0x0391F267), + UINT32_C(0xFF46E9CD), UINT32_C(0x0FA2F221), UINT32_C(0x7DABD744), + UINT32_C(0x9D4A2A3E), UINT32_C(0x9A32364B), UINT32_C(0x52D2E47A), + UINT32_C(0xF0F84AE8), UINT32_C(0x888F488A), UINT32_C(0xD0B872BB) } }, + { { UINT32_C(0xC9790EEF), UINT32_C(0x531E4CEF), UINT32_C(0x2B8D1A58), + UINT32_C(0xF7B5735E), UINT32_C(0xEF568511), UINT32_C(0xB8882F1E), + UINT32_C(0x86A86DB3), UINT32_C(0xAFB08D1C), UINT32_C(0xF54DE8C7), + UINT32_C(0x88CB9DF2), UINT32_C(0x9A683282), UINT32_C(0xA44234F1) }, + { UINT32_C(0xA6E9AB2E), UINT32_C(0xBC1B3D3A), UINT32_C(0x87FC99EE), + UINT32_C(0xEFA071FB), UINT32_C(0xA102DC0F), UINT32_C(0xFA3C737D), + UINT32_C(0xD6A0CBD2), UINT32_C(0xDF3248A6), UINT32_C(0x1ECC1BF4), + UINT32_C(0x6E62A4FF), UINT32_C(0xC8F1BC17), UINT32_C(0xF718F940) } }, + { { UINT32_C(0x4F63F026), UINT32_C(0x2C8B0AAD), UINT32_C(0x50B253CC), + UINT32_C(0x2AFF6238), UINT32_C(0x10C4D122), UINT32_C(0xCAB3E942), + UINT32_C(0x07CD2816), UINT32_C(0x52B59F04), UINT32_C(0x982C41FC), + UINT32_C(0x22322803), UINT32_C(0x8CF50B19), UINT32_C(0x38844E66) }, + { UINT32_C(0xBE3264CD), UINT32_C(0x42A959F7), UINT32_C(0x6C983524), + UINT32_C(0xBDDC24BD), UINT32_C(0x462B8640), UINT32_C(0xA489EB0C), + UINT32_C(0x98029BE7), UINT32_C(0xB7C05092), UINT32_C(0xA1ADDC64), + UINT32_C(0xD5546B5F), UINT32_C(0xA0C655AF), UINT32_C(0xE7CAC1FC) } }, + { { UINT32_C(0x47636F97), UINT32_C(0x14547198), UINT32_C(0xEBCDCCFF), + UINT32_C(0x6FA67481), UINT32_C(0x395D3258), UINT32_C(0xC164872F), + UINT32_C(0xEE6ACDBC), UINT32_C(0xB8CECAFE), UINT32_C(0xA933F180), + UINT32_C(0x3FBFE5F3), UINT32_C(0x898C3B1E), UINT32_C(0xEC20CAC2) }, + { UINT32_C(0x87DA73F9), UINT32_C(0x6A031BEE), UINT32_C(0x5C5AF46E), + UINT32_C(0xD1E667D1), UINT32_C(0x1DC6EEF9), UINT32_C(0xCB3DC168), + UINT32_C(0x33D310C0), UINT32_C(0x2DD1BD94), UINT32_C(0x9207E438), + UINT32_C(0x0F78D493), UINT32_C(0xA99C0E75), UINT32_C(0xC233D544) } }, + { { UINT32_C(0x9E2A0113), UINT32_C(0x228F19F1), UINT32_C(0x0E1A5D37), + UINT32_C(0x58495BE5), UINT32_C(0x38D7F364), UINT32_C(0x97E08F69), + UINT32_C(0x510759B0), UINT32_C(0x1EC3BA3E), UINT32_C(0xE03CD40D), + UINT32_C(0x3682F19A), UINT32_C(0xF9E16D68), UINT32_C(0xC87745D8) }, + { UINT32_C(0x09A642EA), UINT32_C(0xFD527AB5), UINT32_C(0xF9C81F27), + UINT32_C(0x6308EEBD), UINT32_C(0x550C5D68), UINT32_C(0xFA9F666C), + UINT32_C(0x584AB153), UINT32_C(0xDEBA436F), UINT32_C(0x5B63E939), + UINT32_C(0x1D4861D3), UINT32_C(0xC9850221), UINT32_C(0x073BED9B) } }, + { { UINT32_C(0x8B171246), UINT32_C(0x802BCCF0), UINT32_C(0x733B072F), + UINT32_C(0xFFF7D15A), UINT32_C(0x4CBFA4EF), UINT32_C(0xEA386266), + UINT32_C(0xD635946B), UINT32_C(0x9E5B5073), UINT32_C(0xFA81BE95), + UINT32_C(0x16E9A979), UINT32_C(0xB14F701F), UINT32_C(0x41E8716E) }, + { UINT32_C(0x101A6719), UINT32_C(0x25782E0F), UINT32_C(0xC9D66959), + UINT32_C(0x442C4875), UINT32_C(0x2B85D153), UINT32_C(0x52D845D9), + UINT32_C(0x2E831117), UINT32_C(0xFF925138), UINT32_C(0x8E02434B), + UINT32_C(0x01B700CC), UINT32_C(0xEC0BAE3E), UINT32_C(0xD2DB7F8E) } }, + { { UINT32_C(0x966A4872), UINT32_C(0x1B225300), UINT32_C(0x566F537B), + UINT32_C(0x40C149BE), UINT32_C(0xCB680021), UINT32_C(0x3335F4D2), + UINT32_C(0x778E5F5F), UINT32_C(0x773D0263), UINT32_C(0x666FA9ED), + UINT32_C(0x1D9B7602), UINT32_C(0x2E6200CF), UINT32_C(0x52490A10) }, + { UINT32_C(0x961F290B), UINT32_C(0x8434C7DD), UINT32_C(0x64456446), + UINT32_C(0x773AC156), UINT32_C(0x47B712BB), UINT32_C(0x5E2BB789), + UINT32_C(0xBE0974AD), UINT32_C(0xFD3BCBFD), UINT32_C(0x791AD5D8), + UINT32_C(0x71AE9351), UINT32_C(0x6F4E1400), UINT32_C(0x1EE738BA) } }, + { { UINT32_C(0x0BE8E26E), UINT32_C(0x2FA428AB), UINT32_C(0xBB4CF9FC), + UINT32_C(0xFEFF0600), UINT32_C(0xB2EA5FB0), UINT32_C(0x76F25CA9), + UINT32_C(0x6835C5F4), UINT32_C(0xAB7FECF0), UINT32_C(0x19D5F328), + UINT32_C(0x649D0772), UINT32_C(0xACBCB12E), UINT32_C(0xABE7B895) }, + { UINT32_C(0xD69B1EA8), UINT32_C(0xF2D1031A), UINT32_C(0xC60B0BBB), + UINT32_C(0x46065D5D), UINT32_C(0x85D798FF), UINT32_C(0xB0908DC1), + UINT32_C(0xD2C9B18A), UINT32_C(0x4E2420F0), UINT32_C(0xD30432A2), + UINT32_C(0x6B3A9BDD), UINT32_C(0xC9B134AD), UINT32_C(0x501C3383) } }, + { { UINT32_C(0x98A21284), UINT32_C(0x608F0967), UINT32_C(0x059CCEDE), + UINT32_C(0x5361BE86), UINT32_C(0xAFD87EF7), UINT32_C(0x3A40655C), + UINT32_C(0x59083AA2), UINT32_C(0x03CF3117), UINT32_C(0xB6C366D9), + UINT32_C(0x57DB5F61), UINT32_C(0x6DD0D232), UINT32_C(0x29DC275B) }, + { UINT32_C(0x8FA67501), UINT32_C(0xBDAB24DD), UINT32_C(0x65D08C37), + UINT32_C(0x5928F775), UINT32_C(0x645D466A), UINT32_C(0x9448A856), + UINT32_C(0xC0E927A5), UINT32_C(0x6E6B5E2E), UINT32_C(0xE80C6871), + UINT32_C(0xE884D546), UINT32_C(0x53A9A851), UINT32_C(0x10C881C9) } }, + { { UINT32_C(0x9B627AA5), UINT32_C(0x35505374), UINT32_C(0x7976677B), + UINT32_C(0xE7CA1B57), UINT32_C(0x4976CE17), UINT32_C(0x81239712), + UINT32_C(0x96DA31B9), UINT32_C(0x96E9080B), UINT32_C(0xCC64AA1F), + UINT32_C(0x458254AB), UINT32_C(0x48E674C9), UINT32_C(0xFEFF6821) }, + { UINT32_C(0x021F1488), UINT32_C(0x8772F37A), UINT32_C(0xAB56345C), + UINT32_C(0x2E274E18), UINT32_C(0x29823B76), UINT32_C(0x7C7BE61C), + UINT32_C(0x9EEFB39E), UINT32_C(0x275DB7B2), UINT32_C(0xBF5CBCEF), + UINT32_C(0x83B10ED4), UINT32_C(0x518E5183), UINT32_C(0x40D7F5B4) } }, + { { UINT32_C(0xF960B41B), UINT32_C(0x315CCC01), UINT32_C(0x1D99E722), + UINT32_C(0x90B417C9), UINT32_C(0x013463E0), UINT32_C(0x84AFAA0D), + UINT32_C(0x13E6D9E1), UINT32_C(0xF133C5D8), UINT32_C(0x525B7430), + UINT32_C(0xD95C6ADC), UINT32_C(0x7A25106A), UINT32_C(0x082C61AD) }, + { UINT32_C(0xBA1CE179), UINT32_C(0xABC1966D), UINT32_C(0xA5DB529A), + UINT32_C(0xE0578B77), UINT32_C(0xEC84107D), UINT32_C(0x10988C05), + UINT32_C(0x1B207F83), UINT32_C(0xFCADE5D7), UINT32_C(0xC5BA83DB), + UINT32_C(0x0BEB6FDB), UINT32_C(0x57537E34), UINT32_C(0x1C39B86D) } }, + }, + { + { { UINT32_C(0x2A7AECED), UINT32_C(0x5B0B5D69), UINT32_C(0x01DC545F), + UINT32_C(0x4C03450C), UINT32_C(0x404A3458), UINT32_C(0x72AD0A4A), + UINT32_C(0x9F467B60), UINT32_C(0x1DE8E255), UINT32_C(0x90634809), + UINT32_C(0xA4B35705), UINT32_C(0x706F0178), UINT32_C(0x76F30205) }, + { UINT32_C(0x4454F0E5), UINT32_C(0x588D21AB), UINT32_C(0x64134928), + UINT32_C(0xD22DF549), UINT32_C(0x241BCD90), UINT32_C(0xF4E7E73D), + UINT32_C(0x2FACC7CC), UINT32_C(0xB8D8A1D2), UINT32_C(0x1D25D2A0), + UINT32_C(0x483C35A7), UINT32_C(0x1EF9F608), UINT32_C(0x7F8D2545) } }, + { { UINT32_C(0x54EBC926), UINT32_C(0xCB51F039), UINT32_C(0xB8D4A7BB), + UINT32_C(0xE235D356), UINT32_C(0xB41FE1A6), UINT32_C(0x93C8FAFA), + UINT32_C(0xA719F254), UINT32_C(0x6297701D), UINT32_C(0x644F5CDE), + UINT32_C(0x6E9165BC), UINT32_C(0x0C11C542), UINT32_C(0x6506329D) }, + { UINT32_C(0xA92B4250), UINT32_C(0xA2564809), UINT32_C(0x889C2E3E), + UINT32_C(0x0E9AC173), UINT32_C(0x22B1D1BE), UINT32_C(0x286A5926), + UINT32_C(0x6ECDD041), UINT32_C(0x86A3D752), UINT32_C(0x649F9524), + UINT32_C(0x4B867E0A), UINT32_C(0x0629CB0F), UINT32_C(0x1FE7D95A) } }, + { { UINT32_C(0xCA5BAF54), UINT32_C(0xF4F66843), UINT32_C(0xEFE7DB78), + UINT32_C(0x298DB357), UINT32_C(0x7365712F), UINT32_C(0xF607E86E), + UINT32_C(0x8A822BC0), UINT32_C(0xD5882298), UINT32_C(0xC61299B3), + UINT32_C(0x2CFBD63A), UINT32_C(0x67167B1A), UINT32_C(0x6F713D9B) }, + { UINT32_C(0xDE0B077A), UINT32_C(0x750F673F), UINT32_C(0xEE2178DA), + UINT32_C(0x07482708), UINT32_C(0x69123C75), UINT32_C(0x5E6D5BD1), + UINT32_C(0xEAB99B37), UINT32_C(0x6A93D1B6), UINT32_C(0x8CAEC6A3), + UINT32_C(0x6EF4F7E6), UINT32_C(0xCF3ED818), UINT32_C(0x7BE411D6) } }, + { { UINT32_C(0x63A0A7D2), UINT32_C(0xF92B3073), UINT32_C(0x881DC8CF), + UINT32_C(0x32DA431C), UINT32_C(0xC578E3A3), UINT32_C(0xE51BD5ED), + UINT32_C(0x9587FA22), UINT32_C(0xEFDA70D2), UINT32_C(0x9B2EBA85), + UINT32_C(0xCFEC1708), UINT32_C(0xAF7BA530), UINT32_C(0x6AB51A4B) }, + { UINT32_C(0x98174812), UINT32_C(0x5AC155AE), UINT32_C(0xCCB076E3), + UINT32_C(0xCAF07A71), UINT32_C(0xC38718A7), UINT32_C(0x280E86C2), + UINT32_C(0xD63745B7), UINT32_C(0x9D12DE73), UINT32_C(0xBF8A79AA), + UINT32_C(0x0E8EA855), UINT32_C(0xBD705BF7), UINT32_C(0x5EB2BED8) } }, + { { UINT32_C(0xAE16DE53), UINT32_C(0x33FE9578), UINT32_C(0x10BEC902), + UINT32_C(0x3AE85EB5), UINT32_C(0x44AF850E), UINT32_C(0xC4F49658), + UINT32_C(0x087DD658), UINT32_C(0x6EA222B3), UINT32_C(0xA51F1447), + UINT32_C(0xB255E6FD), UINT32_C(0x117E3F48), UINT32_C(0xB35E4997) }, + { UINT32_C(0x05616CA1), UINT32_C(0x562E813B), UINT32_C(0x8A61E156), + UINT32_C(0xDF5925D6), UINT32_C(0x571C728B), UINT32_C(0xB2FA8125), + UINT32_C(0xA2F2D1CF), UINT32_C(0x00864805), UINT32_C(0x1BCCB6FF), + UINT32_C(0x2DC26F41), UINT32_C(0x63AE37DD), UINT32_C(0xEBD5E093) } }, + { { UINT32_C(0x0A285611), UINT32_C(0xD2D68BB3), UINT32_C(0xDC8378F2), + UINT32_C(0x3EAE7596), UINT32_C(0x6CC688A3), UINT32_C(0x2DC6CCC6), + UINT32_C(0x011F5DFB), UINT32_C(0xC45E5713), UINT32_C(0x62D34487), + UINT32_C(0x6B9C4F6C), UINT32_C(0x1FC65551), UINT32_C(0xFAD6F077) }, + { UINT32_C(0x62B23B52), UINT32_C(0x5E3266E0), UINT32_C(0xE98F4715), + UINT32_C(0xF1DAF319), UINT32_C(0x3ED0AE83), UINT32_C(0x064D12EA), + UINT32_C(0x564125CB), UINT32_C(0x5CCF9326), UINT32_C(0xC63C1E9F), + UINT32_C(0x09057022), UINT32_C(0xDC9B5D2E), UINT32_C(0x7171972C) } }, + { { UINT32_C(0xEABD21B2), UINT32_C(0x2364FD9A), UINT32_C(0x9174AD6D), + UINT32_C(0x3CE5F4BB), UINT32_C(0xB38688C0), UINT32_C(0xA4D6D5D0), + UINT32_C(0x6D87FD7D), UINT32_C(0x2292A2D2), UINT32_C(0x4CA02E54), + UINT32_C(0x2A7D1B53), UINT32_C(0xB4185715), UINT32_C(0x7BEE6E7E) }, + { UINT32_C(0x8FC63ACD), UINT32_C(0x73E54609), UINT32_C(0x4064E09D), + UINT32_C(0xF4D93A12), UINT32_C(0x2B92DAA5), UINT32_C(0xD20E157A), + UINT32_C(0xC4B81A00), UINT32_C(0x90D125DB), UINT32_C(0x7682DE13), + UINT32_C(0xCB951C9E), UINT32_C(0x27987545), UINT32_C(0x1ABE58F4) } }, + { { UINT32_C(0x30C70C8D), UINT32_C(0x6D351640), UINT32_C(0xCE2361B8), + UINT32_C(0x8047D811), UINT32_C(0xDF8E2C81), UINT32_C(0x3F8B3D4F), + UINT32_C(0x33FA1F6C), UINT32_C(0x5D595477), UINT32_C(0xE29B8A91), + UINT32_C(0xF769FE5A), UINT32_C(0xD737B2A2), UINT32_C(0x26F0E606) }, + { UINT32_C(0xB8B31C6A), UINT32_C(0x70CBFA5D), UINT32_C(0x863D3AEA), + UINT32_C(0x0F883B4A), UINT32_C(0xE386AE2F), UINT32_C(0x156A4479), + UINT32_C(0xADE8A684), UINT32_C(0xA17A2FCD), UINT32_C(0xE2A7E335), + UINT32_C(0x78BDF958), UINT32_C(0x3B9E3041), UINT32_C(0xD1B4E673) } }, + { { UINT32_C(0x449A6D11), UINT32_C(0x1EAF48EC), UINT32_C(0x6D2FA7B9), + UINT32_C(0x6B94B8E4), UINT32_C(0x728E4C1B), UINT32_C(0x1D75D269), + UINT32_C(0xDD304E2C), UINT32_C(0x91123819), UINT32_C(0x88804F4B), + UINT32_C(0x0B34CAE3), UINT32_C(0xC5495E9A), UINT32_C(0x2BA192FB) }, + { UINT32_C(0xFF4D24BF), UINT32_C(0xC93FF6EF), UINT32_C(0x0342BA78), + UINT32_C(0xF8C2C0B0), UINT32_C(0x831EB94C), UINT32_C(0x8041F769), + UINT32_C(0x7782985E), UINT32_C(0x35310074), UINT32_C(0x3AF84E83), + UINT32_C(0xC755320B), UINT32_C(0x6F497E7F), UINT32_C(0x384B6D26) } }, + { { UINT32_C(0x17E6BD17), UINT32_C(0xEF92CD59), UINT32_C(0xA426965C), + UINT32_C(0xA087305B), UINT32_C(0xAC47F773), UINT32_C(0x13895CE7), + UINT32_C(0xE0BB2867), UINT32_C(0xB85F2A9F), UINT32_C(0x7CD7C58E), + UINT32_C(0x2926E6AA), UINT32_C(0x450459C5), UINT32_C(0xE544EDA6) }, + { UINT32_C(0xB90A9849), UINT32_C(0x73DBC351), UINT32_C(0x848EBE86), + UINT32_C(0x961183F6), UINT32_C(0x80534712), UINT32_C(0xC45BB210), + UINT32_C(0xA654D9A3), UINT32_C(0x379D08D7), UINT32_C(0xBD3FFA9C), + UINT32_C(0x5B97CEF2), UINT32_C(0xDDC2FCE5), UINT32_C(0x0F469F34) } }, + { { UINT32_C(0x0642F38D), UINT32_C(0x6D146108), UINT32_C(0xD21EB887), + UINT32_C(0x055171A0), UINT32_C(0xD0DCEB28), UINT32_C(0x28DFFAB4), + UINT32_C(0x98DE9CCD), UINT32_C(0x0D0E6312), UINT32_C(0x118C3C3F), + UINT32_C(0x750A9156), UINT32_C(0xB049D799), UINT32_C(0x8C1F1390) }, + { UINT32_C(0x439607C5), UINT32_C(0xE4823858), UINT32_C(0x5C111EAB), + UINT32_C(0x947E9BA0), UINT32_C(0xA355DF2E), UINT32_C(0x39C95616), + UINT32_C(0x10E54BDA), UINT32_C(0xF5F6B98E), UINT32_C(0x142B876A), + UINT32_C(0xB0E0B33D), UINT32_C(0xEA18C90C), UINT32_C(0x71197D73) } }, + { { UINT32_C(0xF52BE819), UINT32_C(0x36A5139D), UINT32_C(0x29A45D2B), + UINT32_C(0xF60DDF34), UINT32_C(0xE9220E34), UINT32_C(0x0727EFEC), + UINT32_C(0x4EF7F446), UINT32_C(0x431D3386), UINT32_C(0xFCC4962C), + UINT32_C(0xC3165A64), UINT32_C(0xD64362BB), UINT32_C(0xB7D926E1) }, + { UINT32_C(0xD45F9350), UINT32_C(0x216BC61F), UINT32_C(0xBBAED815), + UINT32_C(0xA974CB2F), UINT32_C(0x86FB2F76), UINT32_C(0x31DF342D), + UINT32_C(0x01D78314), UINT32_C(0x3AB67E05), UINT32_C(0xDEE33ED2), + UINT32_C(0x7AA951E0), UINT32_C(0xCEC78D94), UINT32_C(0x318FBBBD) } }, + { { UINT32_C(0xB8FE0204), UINT32_C(0xAD7EFB65), UINT32_C(0x230AB7F7), + UINT32_C(0x0432E1C5), UINT32_C(0x9C967400), UINT32_C(0x7563A62D), + UINT32_C(0x3524D4FF), UINT32_C(0xD88B9C74), UINT32_C(0xF1A823E3), + UINT32_C(0x16A1991C), UINT32_C(0xFA6F0FFB), UINT32_C(0xCF2F9BFE) }, + { UINT32_C(0xA50CA61F), UINT32_C(0x55AAA946), UINT32_C(0xFED4CAB3), + UINT32_C(0x8CBBD3C8), UINT32_C(0x7651365A), UINT32_C(0x03A0FAB8), + UINT32_C(0x62DC3913), UINT32_C(0x46B5234B), UINT32_C(0xB558CBBD), + UINT32_C(0xFD875B28), UINT32_C(0x11CEB361), UINT32_C(0xA48EC3AE) } }, + { { UINT32_C(0xB3ADBD8B), UINT32_C(0x5DD131A1), UINT32_C(0x29B45EF8), + UINT32_C(0xF9FBCA3A), UINT32_C(0x9341EE18), UINT32_C(0x02204866), + UINT32_C(0x83BF9618), UINT32_C(0x8D13B895), UINT32_C(0xE807459C), + UINT32_C(0x0E395BAE), UINT32_C(0xB190E7DB), UINT32_C(0xB9C110CC) }, + { UINT32_C(0x25D25063), UINT32_C(0xA0DC3452), UINT32_C(0x02371462), + UINT32_C(0x2FB78EC8), UINT32_C(0x8975C2D5), UINT32_C(0xC3A9E7BB), + UINT32_C(0x85A78264), UINT32_C(0x94666872), UINT32_C(0x8029AA92), + UINT32_C(0x480D2CC2), UINT32_C(0x5655726D), UINT32_C(0x237086C7) } }, + { { UINT32_C(0x65EB9EEE), UINT32_C(0x197F14BB), UINT32_C(0x9F12E5FD), + UINT32_C(0xFC93125C), UINT32_C(0x8BFBAE5E), UINT32_C(0x9C20BC53), + UINT32_C(0x4BC053BA), UINT32_C(0xB35E2154), UINT32_C(0x21C3898E), + UINT32_C(0xE5FA9CC7), UINT32_C(0xD42F950F), UINT32_C(0x502D72FF) }, + { UINT32_C(0xD1EB8C31), UINT32_C(0x6812D38A), UINT32_C(0x080D30BB), + UINT32_C(0x1F77F3F1), UINT32_C(0x5A8B1E98), UINT32_C(0x18D12833), + UINT32_C(0x299196CE), UINT32_C(0x7FD39FA9), UINT32_C(0xCF4ED6D6), + UINT32_C(0xFB8C9F11), UINT32_C(0xD6363194), UINT32_C(0x4C00F604) } }, + { { UINT32_C(0xFA2A21C2), UINT32_C(0x5C8AFCF9), UINT32_C(0x1928D133), + UINT32_C(0x71CBF282), UINT32_C(0x42B29506), UINT32_C(0x56BEF28E), + UINT32_C(0x70323DE2), UINT32_C(0xAFBA250C), UINT32_C(0x7DED2C30), + UINT32_C(0x3FE208D1), UINT32_C(0xCE9AA598), UINT32_C(0xBD2CD213) }, + { UINT32_C(0xCFEED070), UINT32_C(0x52C5EC52), UINT32_C(0xD3DA336B), + UINT32_C(0x0A7223E7), UINT32_C(0xCE156B46), UINT32_C(0x7156A4ED), + UINT32_C(0xED7E6159), UINT32_C(0x9AF6C499), UINT32_C(0x13C029AD), + UINT32_C(0x9D7A6797), UINT32_C(0x9018DC77), UINT32_C(0xE5B5C924) } }, + }, + { + { { UINT32_C(0xDE1E4E55), UINT32_C(0x3F2EFF53), UINT32_C(0xE4D3ECC4), + UINT32_C(0x6B749943), UINT32_C(0x0DDE190D), UINT32_C(0xAF10B18A), + UINT32_C(0xA26B0409), UINT32_C(0xF491B98D), UINT32_C(0xA2B1D944), + UINT32_C(0x66080782), UINT32_C(0x97E8C541), UINT32_C(0x59277DC6) }, + { UINT32_C(0x006F18AA), UINT32_C(0xFDBFC5F6), UINT32_C(0xFADD8BE1), + UINT32_C(0x435D165B), UINT32_C(0x57645EF4), UINT32_C(0x8E5D2638), + UINT32_C(0xA0258363), UINT32_C(0x31BCFDA6), UINT32_C(0xD35D2503), + UINT32_C(0xF5330AB8), UINT32_C(0xC7CAB285), UINT32_C(0xB71369F0) } }, + { { UINT32_C(0x40ACC5A8), UINT32_C(0xE6A19DCC), UINT32_C(0xDBC6DBF8), + UINT32_C(0x1C3A1FF1), UINT32_C(0xC6455613), UINT32_C(0xB4D89B9F), + UINT32_C(0xA7390D0E), UINT32_C(0x6CB0FE44), UINT32_C(0x59EA135A), + UINT32_C(0xADE197A4), UINT32_C(0x20680982), UINT32_C(0xDA6AA865) }, + { UINT32_C(0x5A442C1B), UINT32_C(0x03DB9BE9), UINT32_C(0x2BFB93F2), + UINT32_C(0x221A2D73), UINT32_C(0x753C196C), UINT32_C(0x44DEE8D4), + UINT32_C(0x0B7C6FF5), UINT32_C(0x59ADCC70), UINT32_C(0x4CA1B142), + UINT32_C(0xC6260EC2), UINT32_C(0x46CBD4F2), UINT32_C(0x4C3CB5C6) } }, + { { UINT32_C(0xA417111F), UINT32_C(0x8A15D6FE), UINT32_C(0x71D93FCC), + UINT32_C(0xFE4A16BD), UINT32_C(0x55BBE732), UINT32_C(0x7A7EE38C), + UINT32_C(0x1FF94A9D), UINT32_C(0xEFF146A5), UINT32_C(0xDD585AB5), + UINT32_C(0xE572D13E), UINT32_C(0x06491A5D), UINT32_C(0xD879790E) }, + { UINT32_C(0x2A58CB2E), UINT32_C(0x9C84E1C5), UINT32_C(0x6C938630), + UINT32_C(0xD79D1374), UINT32_C(0x385F06C7), UINT32_C(0xDB12CD9B), + UINT32_C(0x7A7759C3), UINT32_C(0x0C93EB97), UINT32_C(0x683BD706), + UINT32_C(0xF1F5B0FE), UINT32_C(0x85EC3D50), UINT32_C(0x541E4F72) } }, + { { UINT32_C(0x81833608), UINT32_C(0x9A0E1535), UINT32_C(0x6E2833AC), + UINT32_C(0x5CCE871E), UINT32_C(0xFB29777C), UINT32_C(0xC17059EA), + UINT32_C(0xE354CAFD), UINT32_C(0x7E40E5FA), UINT32_C(0x4D07C371), + UINT32_C(0x9CF59405), UINT32_C(0xA71C3945), UINT32_C(0x64CE36B2) }, + { UINT32_C(0x56CAF487), UINT32_C(0x69309E96), UINT32_C(0x1AE3454B), + UINT32_C(0x3D719E9F), UINT32_C(0xE25823B6), UINT32_C(0xF2164070), + UINT32_C(0x0BC27359), UINT32_C(0xEAD851BD), UINT32_C(0xB0925094), + UINT32_C(0x3D21BFE8), UINT32_C(0x34A97F4E), UINT32_C(0xA783B1E9) } }, + { { UINT32_C(0x9546491A), UINT32_C(0x406B0C26), UINT32_C(0xF293C4E5), + UINT32_C(0x9E5E15E2), UINT32_C(0x15B164DB), UINT32_C(0xC60D6413), + UINT32_C(0x0C75A78E), UINT32_C(0x0DA46F53), UINT32_C(0xEA0C656B), + UINT32_C(0x7C599BB7), UINT32_C(0x1B1A8122), UINT32_C(0x0F07A512) }, + { UINT32_C(0x15172686), UINT32_C(0x14C7204A), UINT32_C(0x5165625D), + UINT32_C(0x8FAEDFF8), UINT32_C(0x37AEDE40), UINT32_C(0x20F260CE), + UINT32_C(0x8F357FFE), UINT32_C(0xC81F771E), UINT32_C(0xB0912557), + UINT32_C(0x25499197), UINT32_C(0x4C739C74), UINT32_C(0x736197DC) } }, + { { UINT32_C(0x381B3462), UINT32_C(0x6151BAB1), UINT32_C(0x43DBD344), + UINT32_C(0x27E5A078), UINT32_C(0xA1C3E9FB), UINT32_C(0x2CB05BD6), + UINT32_C(0x27CF2A11), UINT32_C(0x2A759760), UINT32_C(0xFF43E702), + UINT32_C(0x0ADCF9DB), UINT32_C(0x1F484146), UINT32_C(0x4BBF03E2) }, + { UINT32_C(0x55B6521A), UINT32_C(0x0E74997F), UINT32_C(0xADE17086), + UINT32_C(0x15629231), UINT32_C(0x7493FC58), UINT32_C(0x7F143E86), + UINT32_C(0xAF8B9670), UINT32_C(0x60869095), UINT32_C(0x7E524869), + UINT32_C(0x482CFCD7), UINT32_C(0x1D454756), UINT32_C(0x9E8060C3) } }, + { { UINT32_C(0xC88B4D3B), UINT32_C(0xE495747A), UINT32_C(0xAE8A948F), + UINT32_C(0xB7559835), UINT32_C(0xDEB56853), UINT32_C(0x67EEF3A9), + UINT32_C(0x9DEE5ADF), UINT32_C(0x0E20E269), UINT32_C(0x61F0A1AA), + UINT32_C(0x9031AF67), UINT32_C(0x683402BC), UINT32_C(0x76669D32) }, + { UINT32_C(0x06718B16), UINT32_C(0x90BD2313), UINT32_C(0x864EFDAC), + UINT32_C(0xE1B22A21), UINT32_C(0x6620089F), UINT32_C(0xE4FFE909), + UINT32_C(0x3428E2D9), UINT32_C(0xB84C842E), UINT32_C(0xFE3871FC), + UINT32_C(0x0E28C880), UINT32_C(0x3F21C200), UINT32_C(0x8932F698) } }, + { { UINT32_C(0x6C90EA5D), UINT32_C(0x603F00CE), UINT32_C(0x40A2F693), + UINT32_C(0x64739307), UINT32_C(0x2174E517), UINT32_C(0xAF65148B), + UINT32_C(0xF784AE74), UINT32_C(0x162FC2CA), UINT32_C(0x4D5F6458), + UINT32_C(0x0D9A8825), UINT32_C(0x43AACE93), UINT32_C(0x0C2D5861) }, + { UINT32_C(0x9F73CBFC), UINT32_C(0xBF1EADDE), UINT32_C(0x9C68BBCA), + UINT32_C(0xDE9C34C0), UINT32_C(0x67EF8A1A), UINT32_C(0x6D95602D), + UINT32_C(0xA791B241), UINT32_C(0x0AF2581B), UINT32_C(0x12CAD604), + UINT32_C(0x14F77361), UINT32_C(0xE2ACD1AD), UINT32_C(0x19F2354D) } }, + { { UINT32_C(0x0D60F263), UINT32_C(0x272F78F6), UINT32_C(0x208FD785), + UINT32_C(0xE7A8F4AF), UINT32_C(0x36554F2C), UINT32_C(0x10E191C6), + UINT32_C(0xFD5CD0B3), UINT32_C(0x06D88551), UINT32_C(0x57069C27), + UINT32_C(0x29BF8568), UINT32_C(0x28AA6FAD), UINT32_C(0x3CE7ECD8) }, + { UINT32_C(0xE9F1A1D8), UINT32_C(0x7D8A92D0), UINT32_C(0xD30B5725), + UINT32_C(0xD40C7FF8), UINT32_C(0xF54CAEB8), UINT32_C(0x16BE6CB2), + UINT32_C(0x14CB0A91), UINT32_C(0x14CA471A), UINT32_C(0x02733CAE), + UINT32_C(0xD5FF15B8), UINT32_C(0xDAA76580), UINT32_C(0xCAF88D87) } }, + { { UINT32_C(0x2C046592), UINT32_C(0x39430E22), UINT32_C(0x1AD26706), + UINT32_C(0x6CDAE81F), UINT32_C(0xA25D9106), UINT32_C(0x8C102159), + UINT32_C(0x27CA9F30), UINT32_C(0x9A440572), UINT32_C(0x70287FBC), + UINT32_C(0x8D34C430), UINT32_C(0x29DB8AFA), UINT32_C(0x9003A455) }, + { UINT32_C(0x7FD971AD), UINT32_C(0x91364CC3), UINT32_C(0x9C60EDB7), + UINT32_C(0x7B3AA048), UINT32_C(0x526F4DD8), UINT32_C(0x58B0E008), + UINT32_C(0xD86D98AE), UINT32_C(0xB7674454), UINT32_C(0xB2B45747), + UINT32_C(0xC25F4051), UINT32_C(0xCC043E8F), UINT32_C(0x8243BF9C) } }, + { { UINT32_C(0x43A0C387), UINT32_C(0xA89641C6), UINT32_C(0x87B9AB17), + UINT32_C(0x6D92205C), UINT32_C(0xDAA0E102), UINT32_C(0x37D691F4), + UINT32_C(0xCDE5312E), UINT32_C(0xEB3E52D7), UINT32_C(0x16F518A2), + UINT32_C(0x60D3C099), UINT32_C(0x8A378EEB), UINT32_C(0x7854C051) }, + { UINT32_C(0x4BBCAAC5), UINT32_C(0x7359DB51), UINT32_C(0x1713F102), + UINT32_C(0xF5B1B68C), UINT32_C(0xE4398DE5), UINT32_C(0xDAEAE645), + UINT32_C(0xD1ABFB82), UINT32_C(0x8C8ACB6C), UINT32_C(0x136423E2), + UINT32_C(0x2E8B76C3), UINT32_C(0xA8BA015E), UINT32_C(0x509DCB2D) } }, + { { UINT32_C(0x9AD9C59C), UINT32_C(0x2FF36815), UINT32_C(0x658E65B9), + UINT32_C(0xB189A4E8), UINT32_C(0xEA786AD2), UINT32_C(0x7D33DDBB), + UINT32_C(0xC0D2DC05), UINT32_C(0x96D0D648), UINT32_C(0xBFA03BE9), + UINT32_C(0x05E49256), UINT32_C(0x8BAF5A1C), UINT32_C(0x0EA4E7A6) }, + { UINT32_C(0x9F9AD5A8), UINT32_C(0x3DDCE0B0), UINT32_C(0x9E49C2CB), + UINT32_C(0xF7809195), UINT32_C(0x21782C2F), UINT32_C(0xBFCEF29D), + UINT32_C(0xC41BFD97), UINT32_C(0xE57AD39F), UINT32_C(0x1355AD19), + UINT32_C(0xC04B93E8), UINT32_C(0x59440F9F), UINT32_C(0xAABC9E6E) } }, + { { UINT32_C(0x5B6459DA), UINT32_C(0x7AA48103), UINT32_C(0x0166E880), + UINT32_C(0x83EF7477), UINT32_C(0x511CCE80), UINT32_C(0x536182B1), + UINT32_C(0x73CA55AA), UINT32_C(0xAFDD2EEE), UINT32_C(0xA8716143), + UINT32_C(0xAB910D0D), UINT32_C(0x83707250), UINT32_C(0x8BEAA42B) }, + { UINT32_C(0x8DA2AB3D), UINT32_C(0x4BCCFD89), UINT32_C(0xEC6AA105), + UINT32_C(0x1DBF68A9), UINT32_C(0x68EB42DA), UINT32_C(0x32CE6108), + UINT32_C(0x8EA62E37), UINT32_C(0x5C2C2C85), UINT32_C(0xCD3088A7), + UINT32_C(0x1ED2791F), UINT32_C(0xFF05070C), UINT32_C(0x496B4FEB) } }, + { { UINT32_C(0x0AA629C5), UINT32_C(0x9FA9121A), UINT32_C(0x57558BEC), + UINT32_C(0xE286CFF1), UINT32_C(0x59813A4D), UINT32_C(0x4D9D657E), + UINT32_C(0x26103519), UINT32_C(0xC4676A16), UINT32_C(0x2BD4DF80), + UINT32_C(0x616160B3), UINT32_C(0x30FBAE87), UINT32_C(0x26FB78CC) }, + { UINT32_C(0x8F0F66BD), UINT32_C(0x09607013), UINT32_C(0x03D9B90D), + UINT32_C(0xDD4E2D0C), UINT32_C(0x600D1B12), UINT32_C(0x5D3A8912), + UINT32_C(0x4308E126), UINT32_C(0xF76DD52F), UINT32_C(0x9E4FCCA6), + UINT32_C(0x97CC0409), UINT32_C(0x04C4DF7B), UINT32_C(0x0CFBE311) } }, + { { UINT32_C(0x28437A23), UINT32_C(0x6CA62C12), UINT32_C(0x40E7A003), + UINT32_C(0x0DAF3353), UINT32_C(0xD20F8079), UINT32_C(0x1FD07DF0), + UINT32_C(0x3BBC9749), UINT32_C(0xEAE7969C), UINT32_C(0x9ECAD022), + UINT32_C(0x55861AFA), UINT32_C(0x1FBC3D4C), UINT32_C(0xEC41DAD9) }, + { UINT32_C(0xDA8B261B), UINT32_C(0x1FE4CB40), UINT32_C(0x427C5C9D), + UINT32_C(0xC2671AB6), UINT32_C(0x261D4939), UINT32_C(0xDFCDA7B8), + UINT32_C(0x2072C0B9), UINT32_C(0x9E7B802B), UINT32_C(0xC7828CC2), + UINT32_C(0x3AFEE900), UINT32_C(0xF6DE987F), UINT32_C(0x3488BF28) } }, + { { UINT32_C(0x7BE1F89E), UINT32_C(0x33B9F2DE), UINT32_C(0x299B15C9), + UINT32_C(0xD4E80821), UINT32_C(0x0E13F37F), UINT32_C(0x87A3067A), + UINT32_C(0x55FD239F), UINT32_C(0x6D4C09ED), UINT32_C(0x92EF014F), + UINT32_C(0x48B1042D), UINT32_C(0xB385A759), UINT32_C(0xA382B2E0) }, + { UINT32_C(0x7F6F84F8), UINT32_C(0xBF571BB0), UINT32_C(0x0CE87F50), + UINT32_C(0x25AFFA37), UINT32_C(0xFE54F1BC), UINT32_C(0x826906D3), + UINT32_C(0xC53AE76A), UINT32_C(0x6B0421F4), UINT32_C(0x4855EB3C), + UINT32_C(0x44F85A3A), UINT32_C(0x8D1F2B27), UINT32_C(0xF49E2151) } }, + }, + { + { { UINT32_C(0x5E3C647B), UINT32_C(0xC0426B77), UINT32_C(0x8CF05348), + UINT32_C(0xBFCBD939), UINT32_C(0x172C0D3D), UINT32_C(0x31D312E3), + UINT32_C(0xEE754737), UINT32_C(0x5F49FDE6), UINT32_C(0x6DA7EE61), + UINT32_C(0x895530F0), UINT32_C(0xE8B3A5FB), UINT32_C(0xCF281B0A) }, + { UINT32_C(0x41B8A543), UINT32_C(0xFD149735), UINT32_C(0x3080DD30), + UINT32_C(0x41A625A7), UINT32_C(0x653908CF), UINT32_C(0xE2BAAE07), + UINT32_C(0xBA02A278), UINT32_C(0xC3D01436), UINT32_C(0x7B21B8F8), + UINT32_C(0xA0D0222E), UINT32_C(0xD7EC1297), UINT32_C(0xFDC270E9) } }, + { { UINT32_C(0x9F101E64), UINT32_C(0x06A67BD2), UINT32_C(0xE1733A4A), + UINT32_C(0xCB6E0AC7), UINT32_C(0x97BC62D2), UINT32_C(0xEE0B5D51), + UINT32_C(0x24C51874), UINT32_C(0x52B17039), UINT32_C(0x82A1A0D5), + UINT32_C(0xFED1F423), UINT32_C(0xDB6270AC), UINT32_C(0x55D90569) }, + { UINT32_C(0x5D73D533), UINT32_C(0x36BE4A9C), UINT32_C(0x976ED4D5), + UINT32_C(0xBE9266D6), UINT32_C(0xB8F8074B), UINT32_C(0xC17436D3), + UINT32_C(0x718545C6), UINT32_C(0x3BB4D399), UINT32_C(0x5C757D21), + UINT32_C(0x8E1EA355), UINT32_C(0x8C474366), UINT32_C(0xF7EDBC97) } }, + { { UINT32_C(0x6EA83242), UINT32_C(0xEC72C650), UINT32_C(0x1B2D237F), + UINT32_C(0xF7DE7BE5), UINT32_C(0x1819EFB0), UINT32_C(0x3C5E2200), + UINT32_C(0x8CDDE870), UINT32_C(0xDF5AB6D6), UINT32_C(0x92A87AEE), + UINT32_C(0x75A44E9D), UINT32_C(0xBCF77F19), UINT32_C(0xBDDC46F4) }, + { UINT32_C(0x669B674D), UINT32_C(0x8191EFBD), UINT32_C(0xED71768F), + UINT32_C(0x52884DF9), UINT32_C(0x65CF242C), UINT32_C(0xE62BE582), + UINT32_C(0x80B1D17B), UINT32_C(0xAE99A3B1), UINT32_C(0x92DE59A9), + UINT32_C(0x48CBB446), UINT32_C(0x2DCB3CE2), UINT32_C(0xD3C226CF) } }, + { { UINT32_C(0x9FD94EC4), UINT32_C(0x9580CDFB), UINT32_C(0x28631AD9), + UINT32_C(0xED273A6C), UINT32_C(0xC327F3E7), UINT32_C(0x5D3D5F77), + UINT32_C(0x35353C5F), UINT32_C(0x05D5339C), UINT32_C(0x5C258EB1), + UINT32_C(0xC56FB5FE), UINT32_C(0xEDCE1F79), UINT32_C(0xEFF8425E) }, + { UINT32_C(0xCF83CF9C), UINT32_C(0xAB7AA141), UINT32_C(0x207D6D4F), + UINT32_C(0xBD2A690A), UINT32_C(0x458D9E52), UINT32_C(0xE1241491), + UINT32_C(0xAA7F0F31), UINT32_C(0xDD2448CC), UINT32_C(0xF0FDA7AB), + UINT32_C(0xEC58D3C7), UINT32_C(0xC91BBA4D), UINT32_C(0x7B6E122D) } }, + { { UINT32_C(0xB1B48156), UINT32_C(0x2A2DEDAF), UINT32_C(0xBB93DB87), + UINT32_C(0xA0A2C63A), UINT32_C(0x08ACD99E), UINT32_C(0xC6559078), + UINT32_C(0xFE4AC331), UINT32_C(0x03EA42AF), UINT32_C(0xEB180ED6), + UINT32_C(0x43D2C14A), UINT32_C(0xB1156A1A), UINT32_C(0xC2F293DD) }, + { UINT32_C(0xA9D81249), UINT32_C(0x1FAFABF5), UINT32_C(0x9A8EEE87), + UINT32_C(0x39ADDEAD), UINT32_C(0x119E2E92), UINT32_C(0x21E206F2), + UINT32_C(0xD74DCEB6), UINT32_C(0xBC5DCC2E), UINT32_C(0x0A73A358), + UINT32_C(0x86647FA3), UINT32_C(0x2F53F642), UINT32_C(0xEAD8BEA4) } }, + { { UINT32_C(0x91C09091), UINT32_C(0x636225F5), UINT32_C(0x71BDCFDF), + UINT32_C(0xCCF5070A), UINT32_C(0xB9668EE2), UINT32_C(0x0EF8D625), + UINT32_C(0xB5E04E4F), UINT32_C(0x57BDF6CD), UINT32_C(0x7C75EA43), + UINT32_C(0xFC6AB0A6), UINT32_C(0xF7FD6EF3), UINT32_C(0xEB6B8AFB) }, + { UINT32_C(0x2A3DF404), UINT32_C(0x5B2AEEF0), UINT32_C(0xB9823197), + UINT32_C(0x31FD3B48), UINT32_C(0x83A7EB23), UINT32_C(0x56226DB6), + UINT32_C(0x5BB1ED2F), UINT32_C(0x3772C21E), UINT32_C(0xCD1ABA6A), + UINT32_C(0x3E833624), UINT32_C(0xAC672DAD), UINT32_C(0xBAE58FFA) } }, + { { UINT32_C(0x31BA1705), UINT32_C(0xCE92224D), UINT32_C(0xF0197F63), + UINT32_C(0x022C6ED2), UINT32_C(0xA4DC1113), UINT32_C(0x21F18D99), + UINT32_C(0x03616BF1), UINT32_C(0x5CD04DE8), UINT32_C(0x9FF12E08), + UINT32_C(0x6F900679), UINT32_C(0x48E61DDF), UINT32_C(0xF59A3315) }, + { UINT32_C(0xB51BD024), UINT32_C(0x9474D42C), UINT32_C(0x9051E49D), + UINT32_C(0x11A0A413), UINT32_C(0xDCE70EDB), UINT32_C(0x79C92705), + UINT32_C(0x34198426), UINT32_C(0x113CE278), UINT32_C(0xEA8616D2), + UINT32_C(0x8978396F), UINT32_C(0xEA894C36), UINT32_C(0x9A2A14D0) } }, + { { UINT32_C(0x604F6E4A), UINT32_C(0x4F1E1254), UINT32_C(0x0187D585), + UINT32_C(0x4513B088), UINT32_C(0x19E0F482), UINT32_C(0x9022F257), + UINT32_C(0xE2239DBF), UINT32_C(0x51FB2A80), UINT32_C(0x998ED9D5), + UINT32_C(0x49940D9E), UINT32_C(0x6C932C5D), UINT32_C(0x0583D241) }, + { UINT32_C(0xF25B73F7), UINT32_C(0x1188CEC8), UINT32_C(0x3B3D06CD), + UINT32_C(0xA28788CB), UINT32_C(0xA083DB5A), UINT32_C(0xDEA194EC), + UINT32_C(0x22DF4272), UINT32_C(0xD93A4F7E), UINT32_C(0x6A009C49), + UINT32_C(0x8D84E4BF), UINT32_C(0x3E3E4A9E), UINT32_C(0x893D8DD9) } }, + { { UINT32_C(0x33D31160), UINT32_C(0x35E909EA), UINT32_C(0x57172F1E), + UINT32_C(0x50203168), UINT32_C(0x51F3D866), UINT32_C(0x2707FC44), + UINT32_C(0xD2442A5D), UINT32_C(0xEB9D2018), UINT32_C(0x5DBFE378), + UINT32_C(0x904D7209), UINT32_C(0x5F13CF77), UINT32_C(0x6DB132A3) }, + { UINT32_C(0x7A3AF54B), UINT32_C(0x9D842BA6), UINT32_C(0x5AA5B4F9), + UINT32_C(0x4E16EA19), UINT32_C(0xAF24228E), UINT32_C(0x2BBA457C), + UINT32_C(0x16F3C5FE), UINT32_C(0xCC04B3BB), UINT32_C(0x77E64944), + UINT32_C(0xBAFAC516), UINT32_C(0xF08BCEE0), UINT32_C(0x31580A34) } }, + { { UINT32_C(0x20C30ACA), UINT32_C(0xC6808DEE), UINT32_C(0xA3EA2056), + UINT32_C(0xDADD216F), UINT32_C(0x7A4A9F9D), UINT32_C(0xD331394E), + UINT32_C(0x424C4026), UINT32_C(0x9E0441AD), UINT32_C(0x0AEB5350), + UINT32_C(0xAEED102F), UINT32_C(0xD45B09DA), UINT32_C(0xC6697FBB) }, + { UINT32_C(0xDEAC1496), UINT32_C(0x52A2590E), UINT32_C(0x250B87AF), + UINT32_C(0x7142B831), UINT32_C(0x6D0784A8), UINT32_C(0xBEF2E68B), + UINT32_C(0xA5F71CEF), UINT32_C(0x5F62593A), UINT32_C(0xB5DA51A3), + UINT32_C(0x3B8F7616), UINT32_C(0xB680F5FE), UINT32_C(0xC7A6FA0D) } }, + { { UINT32_C(0x99C8227C), UINT32_C(0x36C21DE6), UINT32_C(0xC26813B1), + UINT32_C(0xBEE3E867), UINT32_C(0xBDD91549), UINT32_C(0x9B05F2E6), + UINT32_C(0xA7D1110F), UINT32_C(0x34FF2B1F), UINT32_C(0x37F67FD0), + UINT32_C(0x8E6953B9), UINT32_C(0xC3183E20), UINT32_C(0x56C7F18B) }, + { UINT32_C(0x9E2019ED), UINT32_C(0x48AF46DE), UINT32_C(0xF551BBBF), + UINT32_C(0xDEAF972E), UINT32_C(0xCC5E3EEF), UINT32_C(0x88EE38F8), + UINT32_C(0x392D6BAF), UINT32_C(0xFB8D7A44), UINT32_C(0x0127187D), + UINT32_C(0x32293BFC), UINT32_C(0xE58647CC), UINT32_C(0x7689E767) } }, + { { UINT32_C(0x52168013), UINT32_C(0x00CE901B), UINT32_C(0x837AAE71), + UINT32_C(0xC6BF8E38), UINT32_C(0x167677D8), UINT32_C(0xD6F11EFA), + UINT32_C(0x86C8E5CF), UINT32_C(0xE53BB485), UINT32_C(0xC48E74AB), + UINT32_C(0x671167CE), UINT32_C(0x8AD720A7), UINT32_C(0x8A40218C) }, + { UINT32_C(0xE7C1191A), UINT32_C(0x81E827A6), UINT32_C(0xADDB153D), + UINT32_C(0x54058F8D), UINT32_C(0x0D950FA2), UINT32_C(0x0BAF2925), + UINT32_C(0x576DDA13), UINT32_C(0xC244674D), UINT32_C(0x41BCD13B), + UINT32_C(0x8C4630AE), UINT32_C(0x5A077419), UINT32_C(0x6C2127BF) } }, + { { UINT32_C(0xA83C501F), UINT32_C(0xCF977FD5), UINT32_C(0xB6AB176F), + UINT32_C(0xD7C6DF36), UINT32_C(0x397BC6B5), UINT32_C(0x117F6331), + UINT32_C(0xF7A2D491), UINT32_C(0x72A6078B), UINT32_C(0x5242FE2E), + UINT32_C(0xE5A2AAED), UINT32_C(0xFEBDC212), UINT32_C(0x88ECFFDC) }, + { UINT32_C(0xCE33BA21), UINT32_C(0xF2DBBF50), UINT32_C(0xCEB19F07), + UINT32_C(0xE1343B76), UINT32_C(0xD2C28F71), UINT32_C(0x1F32D4C9), + UINT32_C(0x18587685), UINT32_C(0x93FC64B4), UINT32_C(0xBA1F8BD1), + UINT32_C(0x39CEEF9B), UINT32_C(0x8D6D6BB0), UINT32_C(0x99C36A78) } }, + { { UINT32_C(0x3E9561CF), UINT32_C(0x0D063817), UINT32_C(0x3D33704D), + UINT32_C(0x1D8646AA), UINT32_C(0x7A08BA33), UINT32_C(0x8C451384), + UINT32_C(0xE02D6624), UINT32_C(0x96446BD3), UINT32_C(0x2D6F4166), + UINT32_C(0x749849F0), UINT32_C(0x14268BF0), UINT32_C(0xE364DA01) }, + { UINT32_C(0x9AEBFCFD), UINT32_C(0x7CE4587E), UINT32_C(0x56234393), + UINT32_C(0xD4686064), UINT32_C(0x16DF73B2), UINT32_C(0x00231D51), + UINT32_C(0x7279C78C), UINT32_C(0xF6A969B7), UINT32_C(0x6CB4117C), + UINT32_C(0x1FF1F6B6), UINT32_C(0xD3EAB680), UINT32_C(0x30AEBC39) } }, + { { UINT32_C(0x93EF00B9), UINT32_C(0x5CC97E64), UINT32_C(0x972345AE), + UINT32_C(0xDAE13841), UINT32_C(0x4788F43C), UINT32_C(0x85839184), + UINT32_C(0xE2E6CF3E), UINT32_C(0xD0FF521E), UINT32_C(0x4B707C86), + UINT32_C(0xAED14A5B), UINT32_C(0xD2523CF7), UINT32_C(0x7EAAE4A6) }, + { UINT32_C(0x024C8AC6), UINT32_C(0x266472C5), UINT32_C(0xC0170051), + UINT32_C(0xE47E1522), UINT32_C(0x73826BAE), UINT32_C(0x7B83DA61), + UINT32_C(0xCF543F0D), UINT32_C(0xE97E19F5), UINT32_C(0x20BF38E2), + UINT32_C(0x5D5248FA), UINT32_C(0xDF56A037), UINT32_C(0x8A7C2F7D) } }, + { { UINT32_C(0x87B0526C), UINT32_C(0xB04659DD), UINT32_C(0x2307565E), + UINT32_C(0x593C604A), UINT32_C(0x7C630AB8), UINT32_C(0x49E52225), + UINT32_C(0xDCE9CD23), UINT32_C(0x24C1D0C6), UINT32_C(0x85177079), + UINT32_C(0x6FDB241C), UINT32_C(0xF250C351), UINT32_C(0x5F521D19) }, + { UINT32_C(0xA6FB61DF), UINT32_C(0xFB56134B), UINT32_C(0xD75C07ED), + UINT32_C(0xA4E70D69), UINT32_C(0x7D8825A8), UINT32_C(0xB7A82448), + UINT32_C(0xDD64BBCC), UINT32_C(0xA3AEA7D4), UINT32_C(0x8692F539), + UINT32_C(0xD53E6E6C), UINT32_C(0xF7AA4BC0), UINT32_C(0x8DDDA83B) } }, + }, + { + { { UINT32_C(0xDD93D50A), UINT32_C(0x140A0F9F), UINT32_C(0x83B7ABAC), + UINT32_C(0x4799FFDE), UINT32_C(0x04A1F742), UINT32_C(0x78FF7C23), + UINT32_C(0x195BA34E), UINT32_C(0xC0568F51), UINT32_C(0x3B7F78B4), + UINT32_C(0xE9718360), UINT32_C(0xF9EFAA53), UINT32_C(0x9CFD1FF1) }, + { UINT32_C(0xBB06022E), UINT32_C(0xE924D2C5), UINT32_C(0xFAA2AF6D), + UINT32_C(0x9987FA86), UINT32_C(0x6EE37E0F), UINT32_C(0x4B12E73F), + UINT32_C(0x5E5A1DDE), UINT32_C(0x1836FDFA), UINT32_C(0x9DCD6416), + UINT32_C(0x7F1B9225), UINT32_C(0x677544D8), UINT32_C(0xCB2C1B4D) } }, + { { UINT32_C(0x9C213D95), UINT32_C(0x0254486D), UINT32_C(0xCB2F6E94), + UINT32_C(0x68A9DB56), UINT32_C(0x000F5491), UINT32_C(0xFB5858BA), + UINT32_C(0x34009FB6), UINT32_C(0x1315BDD9), UINT32_C(0xC42BDE30), + UINT32_C(0xB18A8E0A), UINT32_C(0xF1070358), UINT32_C(0xFDCF93D1) }, + { UINT32_C(0x3022937E), UINT32_C(0xBEB1DB75), UINT32_C(0xCAC20DB4), + UINT32_C(0x9B9ECA7A), UINT32_C(0xE4122B20), UINT32_C(0x152214D4), + UINT32_C(0xAABCCC7B), UINT32_C(0xD3E673F2), UINT32_C(0xAED07571), + UINT32_C(0x94C50F64), UINT32_C(0xE66B4F17), UINT32_C(0xD767059A) } }, + { { UINT32_C(0xDCD6D14B), UINT32_C(0x40336B12), UINT32_C(0xE3B4919C), + UINT32_C(0xF6BCFF5D), UINT32_C(0x9C841F0C), UINT32_C(0xC337048D), + UINT32_C(0x1D617F50), UINT32_C(0x4CE6D025), UINT32_C(0x8117D379), + UINT32_C(0x00FEF219), UINT32_C(0xF95BE243), UINT32_C(0x18B7C4E9) }, + { UINT32_C(0x38DF08FF), UINT32_C(0x98DE119E), UINT32_C(0x8D772D20), + UINT32_C(0xDFD803BD), UINT32_C(0x0F9678BD), UINT32_C(0x94125B72), + UINT32_C(0x334ACE30), UINT32_C(0xFC5B57CD), UINT32_C(0xB7E86E04), + UINT32_C(0x09486527), UINT32_C(0x6E552039), UINT32_C(0xFE9F8BCC) } }, + { { UINT32_C(0xD6F5A10E), UINT32_C(0x3B75C45B), UINT32_C(0xC1C35F38), + UINT32_C(0xFD4680F4), UINT32_C(0xF8E0A113), UINT32_C(0x5450227D), + UINT32_C(0x73DDBA24), UINT32_C(0x5E69F1AE), UINT32_C(0x57F24645), + UINT32_C(0x2007B80E), UINT32_C(0x3D159741), UINT32_C(0xC63695DC) }, + { UINT32_C(0x4530F623), UINT32_C(0xCBE54D29), UINT32_C(0x2869586B), + UINT32_C(0x986AD573), UINT32_C(0x4CC39F73), UINT32_C(0xE19F7059), + UINT32_C(0x2B1B8DA9), UINT32_C(0x80F00AB3), UINT32_C(0x73F68D26), + UINT32_C(0xB765AAF9), UINT32_C(0xE993F829), UINT32_C(0xBC79A394) } }, + { { UINT32_C(0xF310D2A0), UINT32_C(0x9C441043), UINT32_C(0xDC5EB106), + UINT32_C(0x2865EE58), UINT32_C(0x9CB8065C), UINT32_C(0x71A95922), + UINT32_C(0xA052AF0F), UINT32_C(0x8EB3A733), UINT32_C(0xB09D716E), + UINT32_C(0x56009F42), UINT32_C(0xABCBE6AD), UINT32_C(0xA7F923C5) }, + { UINT32_C(0xFA375C01), UINT32_C(0x263B7669), UINT32_C(0x21EF27A2), + UINT32_C(0x641C47E5), UINT32_C(0xB08FFD25), UINT32_C(0xA89B474E), + UINT32_C(0xF0A239F3), UINT32_C(0x5BE8EC3F), UINT32_C(0x242A6C5A), + UINT32_C(0x0E79957A), UINT32_C(0x0C6C75F5), UINT32_C(0x1DFB26D0) } }, + { { UINT32_C(0x9DFBF22A), UINT32_C(0x2FD97B9B), UINT32_C(0x5643532D), + UINT32_C(0xDEC16CC8), UINT32_C(0x60FEE7C3), UINT32_C(0xDF0E6E39), + UINT32_C(0x545860C8), UINT32_C(0xD09AD7B6), UINT32_C(0x73FC3B7C), + UINT32_C(0xCC16E984), UINT32_C(0x0D4E1555), UINT32_C(0x6CE734C1) }, + { UINT32_C(0x4B5F6032), UINT32_C(0xC6EFE68B), UINT32_C(0x14F54073), + UINT32_C(0x3A64F34C), UINT32_C(0xAC44DC95), UINT32_C(0x25DA689C), + UINT32_C(0x5358AD8A), UINT32_C(0x990C477E), UINT32_C(0xF36DA7DE), + UINT32_C(0x00E958A5), UINT32_C(0xC9B6F161), UINT32_C(0x902B7360) } }, + { { UINT32_C(0x9347B90A), UINT32_C(0x454AB42C), UINT32_C(0xA698B02B), + UINT32_C(0xCAEBE64A), UINT32_C(0xFB86FA40), UINT32_C(0x119CDC69), + UINT32_C(0xC3109281), UINT32_C(0x2E5CB7AD), UINT32_C(0xCD0C3D00), + UINT32_C(0x67BB1EC5), UINT32_C(0x83F25BBF), UINT32_C(0x5D430BC7) }, + { UINT32_C(0x5CDE0ABB), UINT32_C(0x69FD84A8), UINT32_C(0x9816B688), + UINT32_C(0x69DA263E), UINT32_C(0x0E53CBB8), UINT32_C(0xE52D93DF), + UINT32_C(0xADD2D5A7), UINT32_C(0x42CF6F25), UINT32_C(0xC87CA88F), + UINT32_C(0x227BA59D), UINT32_C(0xDA738554), UINT32_C(0x7A1CA876) } }, + { { UINT32_C(0x1CAC82C4), UINT32_C(0x3FA5C105), UINT32_C(0x8A78C9BE), + UINT32_C(0x23C76087), UINT32_C(0x1C5CFA42), UINT32_C(0xE98CDAD6), + UINT32_C(0x0A6C0421), UINT32_C(0x09C30252), UINT32_C(0x42FC61B9), + UINT32_C(0x149BAC7C), UINT32_C(0x3004A3E2), UINT32_C(0x3A1C22AC) }, + { UINT32_C(0x202C7FED), UINT32_C(0xDE6B0D6E), UINT32_C(0xE7E63052), + UINT32_C(0xB2457377), UINT32_C(0x3706B3EF), UINT32_C(0x31725FD4), + UINT32_C(0x2B1AFDBF), UINT32_C(0xE16A347D), UINT32_C(0x8C29CF66), + UINT32_C(0xBE4850C4), UINT32_C(0x2939F23C), UINT32_C(0x8F51CC4D) } }, + { { UINT32_C(0x219AE6C1), UINT32_C(0x169E025B), UINT32_C(0x116E1CA1), + UINT32_C(0x55FF526F), UINT32_C(0xB191F55D), UINT32_C(0x01B810A3), + UINT32_C(0x29588A69), UINT32_C(0x2D981272), UINT32_C(0x48B92199), + UINT32_C(0x53C93770), UINT32_C(0x8A85236F), UINT32_C(0x8C7DD84E) }, + { UINT32_C(0xCAACF958), UINT32_C(0x293D48B6), UINT32_C(0x43572B30), + UINT32_C(0x1F084ACB), UINT32_C(0xFAD91F28), UINT32_C(0x628BFA2D), + UINT32_C(0x829386AF), UINT32_C(0x8D627B11), UINT32_C(0xD44A77BE), + UINT32_C(0x3EC1DD00), UINT32_C(0x649AC7F0), UINT32_C(0x8D3B0D08) } }, + { { UINT32_C(0x177513BF), UINT32_C(0x00A93DAA), UINT32_C(0x42AD79E1), + UINT32_C(0x2EF0B96F), UINT32_C(0xA07129D9), UINT32_C(0x81F5AAF1), + UINT32_C(0x923F2449), UINT32_C(0xFC04B7EF), UINT32_C(0x60CDB1B7), + UINT32_C(0x855DA795), UINT32_C(0xAD5D61D4), UINT32_C(0xB1EB5DAB) }, + { UINT32_C(0x353FD028), UINT32_C(0xD2CEF1AE), UINT32_C(0x9EE94847), + UINT32_C(0xC21D5439), UINT32_C(0x0380C1A8), UINT32_C(0x9ED552BB), + UINT32_C(0x2BAC328F), UINT32_C(0xB156FE7A), UINT32_C(0x7213C6A4), + UINT32_C(0xBB7E0196), UINT32_C(0x1701ED5B), UINT32_C(0x36002A33) } }, + { { UINT32_C(0xDDC9EF4D), UINT32_C(0x20B1632A), UINT32_C(0x272D082B), + UINT32_C(0x2A35FF4C), UINT32_C(0xF6CC9BD3), UINT32_C(0x30D39923), + UINT32_C(0xE65C9D08), UINT32_C(0x6D879BC2), UINT32_C(0x6FA9983C), + UINT32_C(0xCE8274E1), UINT32_C(0x0EB7424F), UINT32_C(0x652371E8) }, + { UINT32_C(0xC5C35282), UINT32_C(0x32B77503), UINT32_C(0xC885A931), + UINT32_C(0xD7306333), UINT32_C(0x72955AA8), UINT32_C(0x8A16D719), + UINT32_C(0x7D51F882), UINT32_C(0x5548F163), UINT32_C(0xBABA59EF), + UINT32_C(0xB311DC66), UINT32_C(0x0DB8F627), UINT32_C(0x773D5448) } }, + { { UINT32_C(0x7A62EB3B), UINT32_C(0x59B1B134), UINT32_C(0xCCEEFB34), + UINT32_C(0x0F8CE157), UINT32_C(0xA798CB2B), UINT32_C(0x3FE842A8), + UINT32_C(0x0BF4161D), UINT32_C(0xD01BC626), UINT32_C(0x4D016FDB), + UINT32_C(0x55EF6E55), UINT32_C(0xB242B201), UINT32_C(0xCB561503) }, + { UINT32_C(0xAF4199C1), UINT32_C(0x076EBC73), UINT32_C(0x697244F7), + UINT32_C(0x39DEDCBB), UINT32_C(0x040162BC), UINT32_C(0x9D184733), + UINT32_C(0x7F6B5FA6), UINT32_C(0x902992C1), UINT32_C(0xBB4952B5), + UINT32_C(0xAD1DE754), UINT32_C(0xA121F6C8), UINT32_C(0x7ACF1B93) } }, + { { UINT32_C(0x325C9B9A), UINT32_C(0x7A56867C), UINT32_C(0xF3DC3D6A), + UINT32_C(0x1A143999), UINT32_C(0x03F5BCB8), UINT32_C(0xCE109590), + UINT32_C(0xD6EEE5B7), UINT32_C(0x034E9035), UINT32_C(0x495DF1BC), + UINT32_C(0x2AFA81C8), UINT32_C(0x08924D02), UINT32_C(0x5EAB52DC) }, + { UINT32_C(0xAA181904), UINT32_C(0xEE6AA014), UINT32_C(0x310AD621), + UINT32_C(0xE62DEF09), UINT32_C(0xC7538A03), UINT32_C(0x6C9792FC), + UINT32_C(0x3E41D789), UINT32_C(0xA89D3E88), UINT32_C(0x9F94AE83), + UINT32_C(0xD60FA11C), UINT32_C(0xE0D6234A), UINT32_C(0x5E16A8C2) } }, + { { UINT32_C(0xA9242F3B), UINT32_C(0x87EC053D), UINT32_C(0xF0E03545), + UINT32_C(0x99544637), UINT32_C(0x6B7019E9), UINT32_C(0xEA0633FF), + UINT32_C(0x68DDDB5B), UINT32_C(0x8CB8AE07), UINT32_C(0x1A811AC7), + UINT32_C(0x892E7C84), UINT32_C(0x73664249), UINT32_C(0xC7EF19EB) }, + { UINT32_C(0xCD1489E3), UINT32_C(0xD1B5819A), UINT32_C(0xDE45D24A), + UINT32_C(0xF9C80FB0), UINT32_C(0x83BB7491), UINT32_C(0x045C21A6), + UINT32_C(0x73F7A47D), UINT32_C(0xA65325BE), UINT32_C(0x9C394F0C), + UINT32_C(0x08D09F0E), UINT32_C(0x268D4F08), UINT32_C(0xE7FB21C6) } }, + { { UINT32_C(0x6CA95C18), UINT32_C(0xC4CCAB95), UINT32_C(0xBC42E040), + UINT32_C(0x563FFD56), UINT32_C(0xE701C604), UINT32_C(0xFA3C64D8), + UINT32_C(0xB0ABAFEE), UINT32_C(0xC88D4426), UINT32_C(0x8542E4C3), + UINT32_C(0x1A353E5E), UINT32_C(0xED726186), UINT32_C(0x9A2D8B7C) }, + { UINT32_C(0x42D097FA), UINT32_C(0xD61CE190), UINT32_C(0x799A748B), + UINT32_C(0x6A63E280), UINT32_C(0x3225486B), UINT32_C(0x0F48D063), + UINT32_C(0x42A3C443), UINT32_C(0x848F8FE1), UINT32_C(0x8493CEF4), + UINT32_C(0x2CCDE250), UINT32_C(0x45E77E7C), UINT32_C(0x5450A508) } }, + { { UINT32_C(0x03112816), UINT32_C(0xD0F4E248), UINT32_C(0xCCBE9E16), + UINT32_C(0xFCAD9DDB), UINT32_C(0x5AE01EA0), UINT32_C(0x177999BF), + UINT32_C(0xCE832DCE), UINT32_C(0xD20C78B9), UINT32_C(0x50C8C646), + UINT32_C(0x3CC694FB), UINT32_C(0xC93D4887), UINT32_C(0x24D75968) }, + { UINT32_C(0x87BC08AF), UINT32_C(0x9F06366A), UINT32_C(0x7FD0DF2A), + UINT32_C(0x59FAB50E), UINT32_C(0x6C4CC234), UINT32_C(0x5FFCC7F7), + UINT32_C(0x65F52D86), UINT32_C(0x87198DD7), UINT32_C(0xA855DF04), + UINT32_C(0x5B9C94B0), UINT32_C(0x8A067AD7), UINT32_C(0xD8BA6C73) } }, + }, + { + { { UINT32_C(0x1C4C9D90), UINT32_C(0x9E9AF315), UINT32_C(0xD12E0A89), + UINT32_C(0x8665C5A9), UINT32_C(0x58286493), UINT32_C(0x204ABD92), + UINT32_C(0xB2E09205), UINT32_C(0x79959889), UINT32_C(0xFE56B101), + UINT32_C(0x0C727A3D), UINT32_C(0x8B657F26), UINT32_C(0xF366244C) }, + { UINT32_C(0xCCA65BE2), UINT32_C(0xDE35D954), UINT32_C(0xB0FD41CE), + UINT32_C(0x52EE1230), UINT32_C(0x36019FEE), UINT32_C(0xFA03261F), + UINT32_C(0x66511D8F), UINT32_C(0xAFDA42D9), UINT32_C(0x821148B9), + UINT32_C(0xF63211DD), UINT32_C(0x6F13A3E1), UINT32_C(0x7B56AF7E) } }, + { { UINT32_C(0x5913E184), UINT32_C(0x47FE4799), UINT32_C(0x82145900), + UINT32_C(0x5BBE584C), UINT32_C(0x9A867173), UINT32_C(0xB76CFA8B), + UINT32_C(0x514BF471), UINT32_C(0x9BC87BF0), UINT32_C(0x71DCF1FC), + UINT32_C(0x37392DCE), UINT32_C(0x3AD1EFA8), UINT32_C(0xEC3EFAE0) }, + { UINT32_C(0x14876451), UINT32_C(0xBBEA5A34), UINT32_C(0x6217090F), + UINT32_C(0x96E5F543), UINT32_C(0x9B1665A9), UINT32_C(0x5B3D4ECD), + UINT32_C(0xE329DF22), UINT32_C(0xE7B0DF26), UINT32_C(0x0BAA808D), + UINT32_C(0x18FB438E), UINT32_C(0xDD516FAF), UINT32_C(0x90757EBF) } }, + { { UINT32_C(0xD5A98D68), UINT32_C(0x1E6F9A95), UINT32_C(0x849DA828), + UINT32_C(0x759EA7DF), UINT32_C(0x6E8B4198), UINT32_C(0x365D5625), + UINT32_C(0x7A4A53F9), UINT32_C(0xE1B9C53B), UINT32_C(0xE32B9B16), + UINT32_C(0x55DC1D50), UINT32_C(0xBB6D5701), UINT32_C(0xA4657EBB) }, + { UINT32_C(0xEACC76E2), UINT32_C(0x4C270249), UINT32_C(0x162B1CC7), + UINT32_C(0xBE49EC75), UINT32_C(0x0689902B), UINT32_C(0x19A95B61), + UINT32_C(0xA4CFC5A8), UINT32_C(0xDD5706BF), UINT32_C(0x14E5B424), + UINT32_C(0xD33BDB73), UINT32_C(0xE69EBA87), UINT32_C(0x21311BD1) } }, + { { UINT32_C(0x72A21ACC), UINT32_C(0x75BA2F9B), UINT32_C(0xA28EDB4C), + UINT32_C(0x356688D4), UINT32_C(0x610D080F), UINT32_C(0x3C339E0B), + UINT32_C(0x33A99C2F), UINT32_C(0x614AC293), UINT32_C(0xAA580AFF), + UINT32_C(0xA5E23AF2), UINT32_C(0xE1FDBA3A), UINT32_C(0xA6BCB860) }, + { UINT32_C(0xB43F9425), UINT32_C(0xAA603365), UINT32_C(0xF7EE4635), + UINT32_C(0xAE8D7126), UINT32_C(0x56330A32), UINT32_C(0xA2B25244), + UINT32_C(0x9E025AA3), UINT32_C(0xC396B5BB), UINT32_C(0xF8A0D5CF), + UINT32_C(0xABBF77FA), UINT32_C(0xEA31C83B), UINT32_C(0xB322EE30) } }, + { { UINT32_C(0x7890E234), UINT32_C(0x04881384), UINT32_C(0x672E70C6), + UINT32_C(0x387F1159), UINT32_C(0x7B307F75), UINT32_C(0x1468A614), + UINT32_C(0xED85EC96), UINT32_C(0x56335B52), UINT32_C(0xD45BCAE9), + UINT32_C(0xDA1BB60F), UINT32_C(0xF9FAEADD), UINT32_C(0x4D94F3F0) }, + { UINT32_C(0xFC78D86B), UINT32_C(0x6C6A7183), UINT32_C(0x3018DEC6), + UINT32_C(0xA425B5C7), UINT32_C(0x2D877399), UINT32_C(0xB1549C33), + UINT32_C(0x92B2BC37), UINT32_C(0x6C41C50C), UINT32_C(0x83EE0DDB), + UINT32_C(0x3A9F380C), UINT32_C(0xC4599E73), UINT32_C(0xDED5FEB6) } }, + { { UINT32_C(0x0B7F8354), UINT32_C(0x14D34C21), UINT32_C(0x9177CE45), + UINT32_C(0x1475A1CD), UINT32_C(0x9B926E4B), UINT32_C(0x9F5F764A), + UINT32_C(0x05DD21FE), UINT32_C(0x77260D1E), UINT32_C(0xC4B937F7), + UINT32_C(0x3C882480), UINT32_C(0x722372F2), UINT32_C(0xC92DCD39) }, + { UINT32_C(0xEC6F657E), UINT32_C(0xF636A1BE), UINT32_C(0x1D30DD35), + UINT32_C(0xB0E6C312), UINT32_C(0xE4654EFE), UINT32_C(0xFE4B0528), + UINT32_C(0x21D230D2), UINT32_C(0x1C4A6820), UINT32_C(0x98FA45AB), + UINT32_C(0x615D2E48), UINT32_C(0x01FDBABF), UINT32_C(0x1F35D6D8) } }, + { { UINT32_C(0x3A7B10D1), UINT32_C(0xA636EEB8), UINT32_C(0xF4A29E73), + UINT32_C(0x4E1AE352), UINT32_C(0xE6BB1EC7), UINT32_C(0x01704F5F), + UINT32_C(0x0EF020AE), UINT32_C(0x75C04F72), UINT32_C(0x5A31E6A6), + UINT32_C(0x448D8CEE), UINT32_C(0x208F994B), UINT32_C(0xE40A9C29) }, + { UINT32_C(0xFD8F9D5D), UINT32_C(0x69E09A30), UINT32_C(0x449BAB7E), + UINT32_C(0xE6A5F7EB), UINT32_C(0x2AA1768B), UINT32_C(0xF25BC18A), + UINT32_C(0x3C841234), UINT32_C(0x9449E404), UINT32_C(0x016A7BEF), + UINT32_C(0x7A3BF43E), UINT32_C(0x2A150B60), UINT32_C(0xF25803E8) } }, + { { UINT32_C(0xB215F9E0), UINT32_C(0xE44A2A57), UINT32_C(0x19066F0A), + UINT32_C(0x38B34DCE), UINT32_C(0x40BB1BFB), UINT32_C(0x8BB91DAD), + UINT32_C(0xE67735FC), UINT32_C(0x64C9F775), UINT32_C(0x88D613CD), + UINT32_C(0xDE142417), UINT32_C(0x1901D88D), UINT32_C(0xC5014FF5) }, + { UINT32_C(0xF38116B0), UINT32_C(0xA250341D), UINT32_C(0x9D6CBCB2), + UINT32_C(0xF96B9DD4), UINT32_C(0x76B3FAC2), UINT32_C(0x15EC6C72), + UINT32_C(0x8124C1E9), UINT32_C(0x88F1952F), UINT32_C(0x975BE4F5), + UINT32_C(0x6B72F8EA), UINT32_C(0x061F7530), UINT32_C(0x23D288FF) } }, + { { UINT32_C(0xAFB96CE3), UINT32_C(0xEBFE3E5F), UINT32_C(0xB1979537), + UINT32_C(0x2275EDFB), UINT32_C(0xC97BA741), UINT32_C(0xC37AB9E8), + UINT32_C(0x63D7C626), UINT32_C(0x446E4B10), UINT32_C(0xD025EB02), + UINT32_C(0xB73E2DCE), UINT32_C(0x7669EEA7), UINT32_C(0x1F952B51) }, + { UINT32_C(0x6069A424), UINT32_C(0xABDD00F6), UINT32_C(0xDC298BFB), + UINT32_C(0x1C0F9D9B), UINT32_C(0xEB757B33), UINT32_C(0x831B1FD3), + UINT32_C(0x59D60B32), UINT32_C(0xD7DBE183), UINT32_C(0x9EF094B3), + UINT32_C(0x663D1F36), UINT32_C(0x67F7F11A), UINT32_C(0x1BD5732E) } }, + { { UINT32_C(0xC75D8892), UINT32_C(0x3C7FB3F5), UINT32_C(0xBA68DA69), + UINT32_C(0x2CFF9A0C), UINT32_C(0x60EC740B), UINT32_C(0x76455E8B), + UINT32_C(0x167B88F0), UINT32_C(0x4B8D67FF), UINT32_C(0x5A4186B1), + UINT32_C(0xEDEC0C02), UINT32_C(0xBEBF35AB), UINT32_C(0x127C462D) }, + { UINT32_C(0x049430FC), UINT32_C(0x9159C67E), UINT32_C(0xE7747320), + UINT32_C(0x86B21DD2), UINT32_C(0x0CF27B89), UINT32_C(0x0E0E0152), + UINT32_C(0xCD1316B6), UINT32_C(0x705F28F5), UINT32_C(0xBEAEA8A8), + UINT32_C(0x76751691), UINT32_C(0x360C5B69), UINT32_C(0x4C73E282) } }, + { { UINT32_C(0xFD7B3D74), UINT32_C(0x46BCC0D5), UINT32_C(0x0DC4F410), + UINT32_C(0x6F13C20E), UINT32_C(0x72F11CDF), UINT32_C(0x98A1AF7D), + UINT32_C(0x7928881C), UINT32_C(0x6099FD83), UINT32_C(0x371BB94B), + UINT32_C(0x66976356), UINT32_C(0x19B945AB), UINT32_C(0x673FBA72) }, + { UINT32_C(0xAED00700), UINT32_C(0xE4D8FA6E), UINT32_C(0x5C71A9F7), + UINT32_C(0xEA2313EC), UINT32_C(0xF99D4AEA), UINT32_C(0xF9ED8268), + UINT32_C(0x42AB59C7), UINT32_C(0xADD89164), UINT32_C(0x3F3A2D45), + UINT32_C(0xB37EB26F), UINT32_C(0xA924841E), UINT32_C(0x0B39BD7A) } }, + { { UINT32_C(0xE03CDBBB), UINT32_C(0xD811EB32), UINT32_C(0x7CC3610E), + UINT32_C(0x12055F1D), UINT32_C(0xA9046E3F), UINT32_C(0x6B23A1A0), + UINT32_C(0x9DD4A749), UINT32_C(0x4D712122), UINT32_C(0xB1BF0AC3), + UINT32_C(0xB0C2ACA1), UINT32_C(0xC1B0432F), UINT32_C(0x71EFF575) }, + { UINT32_C(0x2B44E285), UINT32_C(0x6CD81492), UINT32_C(0xD87E8D20), + UINT32_C(0x3088BD9C), UINT32_C(0xF567E8FA), UINT32_C(0xACE218E5), + UINT32_C(0xCF90CBBB), UINT32_C(0xB3FA0424), UINT32_C(0x770734D3), + UINT32_C(0xADBDA751), UINT32_C(0x5AD6569A), UINT32_C(0xBCD78BAD) } }, + { { UINT32_C(0x7F39641F), UINT32_C(0xCADB31FA), UINT32_C(0x825E5562), + UINT32_C(0x3EF3E295), UINT32_C(0xF4094C64), UINT32_C(0x4893C633), + UINT32_C(0x8ADDF432), UINT32_C(0x52F685F1), UINT32_C(0x7FDC9373), + UINT32_C(0x9FD887AB), UINT32_C(0xE8680E8B), UINT32_C(0x47A9ADA0) }, + { UINT32_C(0xF0CD44F6), UINT32_C(0x579313B7), UINT32_C(0xE188AE2E), + UINT32_C(0xAC4B8668), UINT32_C(0x8FB145BD), UINT32_C(0x648F4369), + UINT32_C(0x74629E31), UINT32_C(0xE0460AB3), UINT32_C(0x8FF2B05F), + UINT32_C(0xC25F2875), UINT32_C(0x2D31EAEA), UINT32_C(0x4720C2B6) } }, + { { UINT32_C(0x13D48F80), UINT32_C(0x4603CDF4), UINT32_C(0xA49725DA), + UINT32_C(0x9ADB50E2), UINT32_C(0x65DF63F0), UINT32_C(0x8CD33050), + UINT32_C(0xCD643003), UINT32_C(0x58D8B3BB), UINT32_C(0xB739826B), + UINT32_C(0x170A4F4A), UINT32_C(0x1EAD0E17), UINT32_C(0x857772B5) }, + { UINT32_C(0xE65320F1), UINT32_C(0x01B78152), UINT32_C(0xB7503FC0), + UINT32_C(0xA6B4D845), UINT32_C(0x3DD50798), UINT32_C(0x0F5089B9), + UINT32_C(0x5690B6BE), UINT32_C(0x488F200F), UINT32_C(0x9E096F36), + UINT32_C(0x220B4ADF), UINT32_C(0x8CE5BC7C), UINT32_C(0x474D7C9F) } }, + { { UINT32_C(0xC745F8C9), UINT32_C(0xFED8C058), UINT32_C(0x291262D1), + UINT32_C(0xB683179E), UINT32_C(0xD15EE88C), UINT32_C(0x26ABD367), + UINT32_C(0xF60A6249), UINT32_C(0x29E8EED3), UINT32_C(0x1E02D6E1), + UINT32_C(0xED6008BB), UINT32_C(0xA6B12B8D), UINT32_C(0xD82ECF4C) }, + { UINT32_C(0xAAE4FA22), UINT32_C(0x9929D021), UINT32_C(0x336A1AB3), + UINT32_C(0xBE4DEF14), UINT32_C(0x8C80A312), UINT32_C(0x529B7E09), + UINT32_C(0xEE0EB0CE), UINT32_C(0xB059188D), UINT32_C(0x16DEAB7F), + UINT32_C(0x1E42979A), UINT32_C(0x84EE9477), UINT32_C(0x24110349) } }, + { { UINT32_C(0x2BE579CC), UINT32_C(0xD6524685), UINT32_C(0xC456FDED), + UINT32_C(0x849316F1), UINT32_C(0x2D1B67DA), UINT32_C(0xC51B7DA4), + UINT32_C(0x41BC6D6A), UINT32_C(0xC25B539E), UINT32_C(0xA9BF8BED), + UINT32_C(0xE3B7CCA3), UINT32_C(0x045C15E4), UINT32_C(0x813EF18C) }, + { UINT32_C(0x697982C4), UINT32_C(0x5F3789A1), UINT32_C(0x8C435566), + UINT32_C(0x4C125369), UINT32_C(0xDC0A92C6), UINT32_C(0x00A7AE6E), + UINT32_C(0x2F64A053), UINT32_C(0x1ABC929B), UINT32_C(0x38666B44), + UINT32_C(0xF4925C4C), UINT32_C(0x0F3DE7F6), UINT32_C(0xA81044B0) } }, + }, + { + { { UINT32_C(0xC2EC3731), UINT32_C(0xBCC88422), UINT32_C(0x10DC4EC2), + UINT32_C(0x78A3E4D4), UINT32_C(0x2571D6B1), UINT32_C(0x745DA1EF), + UINT32_C(0x739A956E), UINT32_C(0xF01C2921), UINT32_C(0xE4BFFC16), + UINT32_C(0xEFFD8065), UINT32_C(0xF36FE72C), UINT32_C(0x6EFE62A1) }, + { UINT32_C(0x0F4629A4), UINT32_C(0xF49E90D2), UINT32_C(0x8CE646F4), + UINT32_C(0xADD1DCC7), UINT32_C(0xB7240D91), UINT32_C(0xCB78B583), + UINT32_C(0x03F8387F), UINT32_C(0x2E1A7C3C), UINT32_C(0x3200F2D9), + UINT32_C(0x16566C22), UINT32_C(0xAAF80A84), UINT32_C(0x2361B14B) } }, + { { UINT32_C(0xB5733309), UINT32_C(0xDB1CFFD2), UINT32_C(0x0F9DD939), + UINT32_C(0x24BC250B), UINT32_C(0xA3C1DB85), UINT32_C(0xA4181E5A), + UINT32_C(0xAC55D391), UINT32_C(0xE5183E51), UINT32_C(0xEFD270D0), + UINT32_C(0x2793D5EF), UINT32_C(0xC0631546), UINT32_C(0x7D56F63D) }, + { UINT32_C(0x0C1EE59D), UINT32_C(0xECB40A59), UINT32_C(0xBB5BFA2C), + UINT32_C(0xE613A9E4), UINT32_C(0x6C5830F9), UINT32_C(0xA89B14AB), + UINT32_C(0xA03F201E), UINT32_C(0x4DC477DC), UINT32_C(0xC88C54F6), + UINT32_C(0x5604F5DA), UINT32_C(0x2ACFC66E), UINT32_C(0xD49264DC) } }, + { { UINT32_C(0x1C4DFA95), UINT32_C(0x283DD7F0), UINT32_C(0x62C0B160), + UINT32_C(0xB898CC2C), UINT32_C(0x870282AA), UINT32_C(0xBA08C095), + UINT32_C(0xF4E36324), UINT32_C(0xB02B00D8), UINT32_C(0x604CECF2), + UINT32_C(0x53AADDC0), UINT32_C(0x84DDD24E), UINT32_C(0xF1F927D3) }, + { UINT32_C(0xE2ABC9E1), UINT32_C(0x34BC00A0), UINT32_C(0x60289F88), + UINT32_C(0x2DA1227D), UINT32_C(0xCEF68F74), UINT32_C(0x5228EAAA), + UINT32_C(0x3C029351), UINT32_C(0x40A790D2), UINT32_C(0x8442E3B7), + UINT32_C(0xE0E9AF5C), UINT32_C(0xA9F141E0), UINT32_C(0xA3214142) } }, + { { UINT32_C(0xF9A58E3D), UINT32_C(0x72F4949E), UINT32_C(0xA48660A6), + UINT32_C(0x738C700B), UINT32_C(0x092A5805), UINT32_C(0x71B04726), + UINT32_C(0x0F5CDB72), UINT32_C(0xAD5C3C11), UINT32_C(0x554BFC49), + UINT32_C(0xD4951F9E), UINT32_C(0x6131EBE7), UINT32_C(0xEE594EE5) }, + { UINT32_C(0x3C1AF0A9), UINT32_C(0x37DA59F3), UINT32_C(0xCB040A63), + UINT32_C(0xD7AFC73B), UINT32_C(0x4D89FA65), UINT32_C(0xD020962A), + UINT32_C(0x71D824F5), UINT32_C(0x2610C61E), UINT32_C(0x3C050E31), + UINT32_C(0x9C917DA7), UINT32_C(0xE6E7EBFB), UINT32_C(0x3840F92F) } }, + { { UINT32_C(0x8D8B8CED), UINT32_C(0x50FBD7FE), UINT32_C(0x47D240AE), + UINT32_C(0xC7282F75), UINT32_C(0x1930FF73), UINT32_C(0x79646A47), + UINT32_C(0x2F7F5A77), UINT32_C(0x2E0BAC4E), UINT32_C(0x26127E0B), + UINT32_C(0x0EE44FA5), UINT32_C(0x82BC2AA7), UINT32_C(0x678881B7) }, + { UINT32_C(0x67F5F497), UINT32_C(0xB9E5D384), UINT32_C(0xA9B7106B), + UINT32_C(0x8F94A7D4), UINT32_C(0x9D329F68), UINT32_C(0xBF7E0B07), + UINT32_C(0x45D192FB), UINT32_C(0x169B93EA), UINT32_C(0x20DBE8C0), + UINT32_C(0xCCAA9467), UINT32_C(0x938F9574), UINT32_C(0xD4513A50) } }, + { { UINT32_C(0x054CB874), UINT32_C(0x841C96B4), UINT32_C(0xA3C26834), + UINT32_C(0xD75B1AF1), UINT32_C(0xEE6575F0), UINT32_C(0x7237169D), + UINT32_C(0x0322AADC), UINT32_C(0xD71FC7E5), UINT32_C(0x949E3A8E), + UINT32_C(0xD7A23F1E), UINT32_C(0xDD31D8C7), UINT32_C(0x77E2D102) }, + { UINT32_C(0xD10F5A1F), UINT32_C(0x5AD69D09), UINT32_C(0xB99D9A0B), + UINT32_C(0x526C9CB4), UINT32_C(0x972B237D), UINT32_C(0x521BB10B), + UINT32_C(0xA326F342), UINT32_C(0x1E4CD42F), UINT32_C(0xF0F126CA), + UINT32_C(0x5BB6DB27), UINT32_C(0xA4A515AD), UINT32_C(0x587AF22C) } }, + { { UINT32_C(0xB12E542F), UINT32_C(0x1123A531), UINT32_C(0xB9EB2811), + UINT32_C(0x1D01A64D), UINT32_C(0xF2D70F87), UINT32_C(0xA4A3515B), + UINT32_C(0xB4BD0270), UINT32_C(0xFA205234), UINT32_C(0x5EDA26B9), + UINT32_C(0x74B81830), UINT32_C(0x56578E75), UINT32_C(0x9305D6E6) }, + { UINT32_C(0x9F11BE19), UINT32_C(0xF38E69DE), UINT32_C(0x44DBE89F), + UINT32_C(0x1E2A5C23), UINT32_C(0xFD286654), UINT32_C(0x1077E7BC), + UINT32_C(0x0FCA4741), UINT32_C(0xD3669894), UINT32_C(0x278F8497), + UINT32_C(0x893BF904), UINT32_C(0xEB3E14F4), UINT32_C(0xD6AC5F83) } }, + { { UINT32_C(0x488F5F74), UINT32_C(0x327B9DAB), UINT32_C(0xCAB7364F), + UINT32_C(0x2B44F4B8), UINT32_C(0x19B6C6BD), UINT32_C(0xB4A6D22D), + UINT32_C(0xFC77CD3E), UINT32_C(0xA087E613), UINT32_C(0xB0B49BC7), + UINT32_C(0x4558E327), UINT32_C(0xCD835D35), UINT32_C(0x188805BE) }, + { UINT32_C(0xC1DC1007), UINT32_C(0x592F293C), UINT32_C(0x6AF02B44), + UINT32_C(0xFAEE660F), UINT32_C(0x904035F2), UINT32_C(0x5BFBB3BF), + UINT32_C(0x79C07E70), UINT32_C(0xD7C9AE60), UINT32_C(0x234896C2), + UINT32_C(0xC5287DD4), UINT32_C(0xCB0E4121), UINT32_C(0xC4CE4523) } }, + { { UINT32_C(0x58344831), UINT32_C(0x3626B406), UINT32_C(0x8E55C984), + UINT32_C(0xABCCE356), UINT32_C(0x77241602), UINT32_C(0x495CC81C), + UINT32_C(0x6D70DF8F), UINT32_C(0x4FB79676), UINT32_C(0x5B071DCA), + UINT32_C(0x6354B37C), UINT32_C(0x8C0FC0AD), UINT32_C(0x2CAD80A4) }, + { UINT32_C(0xF68739B4), UINT32_C(0x18AADD51), UINT32_C(0x47F09C6C), + UINT32_C(0x1BFBB177), UINT32_C(0xA8FD51C4), UINT32_C(0x9355EA19), + UINT32_C(0xEE58DB7B), UINT32_C(0x3D512A84), UINT32_C(0xE9237640), + UINT32_C(0x70842AFD), UINT32_C(0xACAF858D), UINT32_C(0x36F515CA) } }, + { { UINT32_C(0x7E768B23), UINT32_C(0x3DDEC7C4), UINT32_C(0x036D43ED), + UINT32_C(0x97E13C53), UINT32_C(0x3A39AB5F), UINT32_C(0x871E5925), + UINT32_C(0x07E68E2B), UINT32_C(0x9AF292DE), UINT32_C(0x4A40112E), + UINT32_C(0x41158349), UINT32_C(0x3D4D97E6), UINT32_C(0xCDBB46AF) }, + { UINT32_C(0x3C0EBE40), UINT32_C(0x2F891293), UINT32_C(0x3EBAD1E5), + UINT32_C(0x696C7EEE), UINT32_C(0x33B50D99), UINT32_C(0x8A5F3B69), + UINT32_C(0x7ED47DDE), UINT32_C(0xB7BC4840), UINT32_C(0x1E6706D8), + UINT32_C(0x3A6F8E6C), UINT32_C(0x3D84BB8F), UINT32_C(0x6A147943) } }, + { { UINT32_C(0x603AE8D1), UINT32_C(0xEC3A9C78), UINT32_C(0x228C29E5), + UINT32_C(0xBFE07E37), UINT32_C(0x396DBC2B), UINT32_C(0xB0385C5B), + UINT32_C(0xDF85F41F), UINT32_C(0x7C14FE83), UINT32_C(0xADFD463E), + UINT32_C(0xE2E64676), UINT32_C(0x8BF9F23D), UINT32_C(0x5BEF10AA) }, + { UINT32_C(0xF6BAB6DA), UINT32_C(0xFA83EA0D), UINT32_C(0x966BF7E3), + UINT32_C(0xCD0C8BA5), UINT32_C(0x98501C2E), UINT32_C(0xD62216B4), + UINT32_C(0xC3E69F2D), UINT32_C(0xB7F298A4), UINT32_C(0x9C8740F4), + UINT32_C(0x42CEF13B), UINT32_C(0x0DD64307), UINT32_C(0xBB317E52) } }, + { { UINT32_C(0x3FFEE775), UINT32_C(0x22B6245C), UINT32_C(0xB37CE7AA), + UINT32_C(0x5C3F60BE), UINT32_C(0xE1FEC0DF), UINT32_C(0xDE195D40), + UINT32_C(0xA0A82074), UINT32_C(0x3BFAFBC5), UINT32_C(0xC72CA86A), + UINT32_C(0xC36EC86A), UINT32_C(0x13FD43EA), UINT32_C(0x56062851) }, + { UINT32_C(0x8E0B03A4), UINT32_C(0x8686BE80), UINT32_C(0xD540D440), + UINT32_C(0xC3BD1F93), UINT32_C(0xBF96CEC5), UINT32_C(0x13E4EBC0), + UINT32_C(0x9190C844), UINT32_C(0xE8E23984), UINT32_C(0x00844802), + UINT32_C(0x183593A6), UINT32_C(0x4D206878), UINT32_C(0x46716879) } }, + { { UINT32_C(0xB6F63D19), UINT32_C(0x358F394D), UINT32_C(0x6B052194), + UINT32_C(0xA75D4849), UINT32_C(0x5C8D7975), UINT32_C(0x58403590), + UINT32_C(0x6CBFBD77), UINT32_C(0x86DC9B6B), UINT32_C(0x647A51E5), + UINT32_C(0x2DB04D77), UINT32_C(0xF8950D88), UINT32_C(0x5E9A5B02) }, + { UINT32_C(0x017168B0), UINT32_C(0xCE69A7E5), UINT32_C(0xC4843AD3), + UINT32_C(0x94630FAC), UINT32_C(0x1EFC44FF), UINT32_C(0xB3B9D736), + UINT32_C(0xB14D7F93), UINT32_C(0xE729E9B6), UINT32_C(0xE0ED0ABC), + UINT32_C(0xA071FC60), UINT32_C(0x8C8D9B83), UINT32_C(0xFC1A9971) } }, + { { UINT32_C(0xD138E975), UINT32_C(0x49686031), UINT32_C(0x5A8EF0D1), + UINT32_C(0x64864038), UINT32_C(0xE7F7DE49), UINT32_C(0x32679713), + UINT32_C(0x29D1CD1D), UINT32_C(0x59132349), UINT32_C(0x20BE9ED2), + UINT32_C(0x849AA23A), UINT32_C(0x284B3F33), UINT32_C(0x15D303E1) }, + { UINT32_C(0xB63F9FE9), UINT32_C(0x37309475), UINT32_C(0x45B7256A), + UINT32_C(0x327BAC8B), UINT32_C(0xD17FC5D3), UINT32_C(0x291CD227), + UINT32_C(0xA973EDF1), UINT32_C(0x8291D8CD), UINT32_C(0x437ABA09), + UINT32_C(0xF3843562), UINT32_C(0x271D0785), UINT32_C(0x33FFB704) } }, + { { UINT32_C(0x47E11E5E), UINT32_C(0x5248D6E4), UINT32_C(0x269C7ED3), + UINT32_C(0x0F66FC3C), UINT32_C(0x903E346E), UINT32_C(0x18C0D2B9), + UINT32_C(0x4BEAE1B8), UINT32_C(0xD81D9D97), UINT32_C(0xFC30FDF3), + UINT32_C(0x610326B0), UINT32_C(0x19A7DFCD), UINT32_C(0x2B136870) }, + { UINT32_C(0xB9527676), UINT32_C(0xEC75F70A), UINT32_C(0x29A3D897), + UINT32_C(0x90829F51), UINT32_C(0x97980302), UINT32_C(0x92FE1809), + UINT32_C(0x68474991), UINT32_C(0xA3F2498E), UINT32_C(0x0F22BBAD), + UINT32_C(0x6A66307B), UINT32_C(0x20378557), UINT32_C(0x32014B91) } }, + { { UINT32_C(0x3CD98610), UINT32_C(0x72CD7D55), UINT32_C(0x74504ADF), + UINT32_C(0xC3D560B0), UINT32_C(0xCEBB5D5D), UINT32_C(0x23F0A982), + UINT32_C(0xB839DDB8), UINT32_C(0x1431C15B), UINT32_C(0xCEB72207), + UINT32_C(0x7E207CD8), UINT32_C(0xE7EFB28D), UINT32_C(0x28E0A848) }, + { UINT32_C(0x1BD96F6E), UINT32_C(0xD22561FE), UINT32_C(0x62A8236B), + UINT32_C(0x04812C18), UINT32_C(0x975491FA), UINT32_C(0xA0BF2334), + UINT32_C(0x435DF87F), UINT32_C(0x294F42A6), UINT32_C(0xA5D6F4F6), + UINT32_C(0x2772B783), UINT32_C(0x2724F853), UINT32_C(0x348F92ED) } }, + }, + { + { { UINT32_C(0x1A42E5E7), UINT32_C(0xC20FB911), UINT32_C(0x81D12863), + UINT32_C(0x075A678B), UINT32_C(0x5CC0AA89), UINT32_C(0x12BCBC6A), + UINT32_C(0x4FB9F01E), UINT32_C(0x5279C6AB), UINT32_C(0x11AE1B89), + UINT32_C(0xBC8E1789), UINT32_C(0xC290003C), UINT32_C(0xAE74A706) }, + { UINT32_C(0x79DF3F45), UINT32_C(0x9949D6EC), UINT32_C(0x96C8D37F), + UINT32_C(0xBA18E262), UINT32_C(0xDD2275BF), UINT32_C(0x68DE6EE2), + UINT32_C(0xC419F1D5), UINT32_C(0xA9E4FFF8), UINT32_C(0xA52B5A40), + UINT32_C(0xBC759CA4), UINT32_C(0x63B0996D), UINT32_C(0xFF18CBD8) } }, + { { UINT32_C(0xD7DD47E5), UINT32_C(0x73C57FDE), UINT32_C(0xD49A7F5D), + UINT32_C(0xB0FE5479), UINT32_C(0xCFB9821E), UINT32_C(0xD25C71F1), + UINT32_C(0xCF6A1D68), UINT32_C(0x9427E209), UINT32_C(0xACD24E64), + UINT32_C(0xBF3C3916), UINT32_C(0xBDA7B8B5), UINT32_C(0x7E9F5583) }, + { UINT32_C(0xCF971E11), UINT32_C(0xE7C5F7C8), UINT32_C(0x3C7F035E), + UINT32_C(0xEC16D5D7), UINT32_C(0xE66B277C), UINT32_C(0x818DC472), + UINT32_C(0xB2816F1E), UINT32_C(0x4413FD47), UINT32_C(0x48383C6D), + UINT32_C(0x40F262AF), UINT32_C(0x4F190537), UINT32_C(0xFB057584) } }, + { { UINT32_C(0x08962F6B), UINT32_C(0x487EDC07), UINT32_C(0x190A7E55), + UINT32_C(0x6002F1E7), UINT32_C(0x10FDBA0C), UINT32_C(0x7FC62BEA), + UINT32_C(0x2C3DBF33), UINT32_C(0xC836BBC5), UINT32_C(0x4F7D2A46), + UINT32_C(0x4FDFB5C3), UINT32_C(0xDCA0DF71), UINT32_C(0x824654DE) }, + { UINT32_C(0x0C23902B), UINT32_C(0x30A07676), UINT32_C(0x77FBBF37), + UINT32_C(0x7F1EBB93), UINT32_C(0xFACC13DB), UINT32_C(0xD307D49D), + UINT32_C(0xAE1A261A), UINT32_C(0x148D673A), UINT32_C(0x52D98650), + UINT32_C(0xE008F95B), UINT32_C(0x9F558FDE), UINT32_C(0xC7614440) } }, + { { UINT32_C(0x9CB16650), UINT32_C(0x17CD6AF6), UINT32_C(0x69F4EEBE), + UINT32_C(0x86CC27C1), UINT32_C(0x78822432), UINT32_C(0x7E495B1D), + UINT32_C(0x1B974525), UINT32_C(0xFED338E3), UINT32_C(0x86F3CE21), + UINT32_C(0x527743D3), UINT32_C(0xB515C896), UINT32_C(0x87948AD3) }, + { UINT32_C(0xB17F2FB8), UINT32_C(0x9FDE7039), UINT32_C(0xD9B89D96), + UINT32_C(0xA2FA9A5F), UINT32_C(0x36FF74DC), UINT32_C(0x5D46600B), + UINT32_C(0x8302C3C9), UINT32_C(0x8EA74B04), UINT32_C(0xF744B5EB), + UINT32_C(0xD560F570), UINT32_C(0xFE762402), UINT32_C(0xC921023B) } }, + { { UINT32_C(0xFFF4C8ED), UINT32_C(0xA35AB657), UINT32_C(0x8A5FABD7), + UINT32_C(0x017C6124), UINT32_C(0x09ACDA28), UINT32_C(0x56463025), + UINT32_C(0x14CF238A), UINT32_C(0x6038D361), UINT32_C(0xAF1B9F07), + UINT32_C(0x1428B1B6), UINT32_C(0x7482E95C), UINT32_C(0x5827FF44) }, + { UINT32_C(0x780FF362), UINT32_C(0xCB997E18), UINT32_C(0xE0BCAC1E), + UINT32_C(0x2B89D702), UINT32_C(0xA837DDC8), UINT32_C(0xC632A0B5), + UINT32_C(0x59762647), UINT32_C(0xF3EFCF1F), UINT32_C(0x38B0D60A), + UINT32_C(0xE9BA309A), UINT32_C(0x20B5FB37), UINT32_C(0x05DEABDD) } }, + { { UINT32_C(0xCB8AF047), UINT32_C(0xD44E5DBA), UINT32_C(0x943CFE82), + UINT32_C(0x15400CB4), UINT32_C(0x9DF88B67), UINT32_C(0xDBD69575), + UINT32_C(0xB2405A7D), UINT32_C(0x8299DB2B), UINT32_C(0x0B1D80CD), + UINT32_C(0x46E3BF77), UINT32_C(0xE82BA3D9), UINT32_C(0xC50CF66C) }, + { UINT32_C(0xF2F747A9), UINT32_C(0xB2910A07), UINT32_C(0x5ADC89C1), + UINT32_C(0xF6B669DB), UINT32_C(0x9052B081), UINT32_C(0x3B5EF1A0), + UINT32_C(0xB594ACE2), UINT32_C(0x0F5D5ED3), UINT32_C(0xD5F01320), + UINT32_C(0xDA30B8D5), UINT32_C(0xAAFCD58F), UINT32_C(0x0D688C5E) } }, + { { UINT32_C(0x2A161074), UINT32_C(0x5EEE3A31), UINT32_C(0xEFE2BE37), + UINT32_C(0x6BAAAE56), UINT32_C(0xE3D78698), UINT32_C(0xF9787F61), + UINT32_C(0x50630A30), UINT32_C(0xC6836B26), UINT32_C(0x1445DEF1), + UINT32_C(0x7445B85D), UINT32_C(0xD568A6A5), UINT32_C(0xD72016A2) }, + { UINT32_C(0xE355614F), UINT32_C(0x9DD6F533), UINT32_C(0x91E04588), + UINT32_C(0x637E7E5F), UINT32_C(0xB9FB1391), UINT32_C(0x42E142F3), + UINT32_C(0x41AFE5DA), UINT32_C(0x0D07C05C), UINT32_C(0x1394EDF1), + UINT32_C(0xD7CD25C8), UINT32_C(0xB99288EE), UINT32_C(0xEBE6A0FC) } }, + { { UINT32_C(0xBABBAD86), UINT32_C(0xB8E63B7B), UINT32_C(0x90D66766), + UINT32_C(0x63226A9F), UINT32_C(0x5CF26666), UINT32_C(0x26381836), + UINT32_C(0x4CADD0BF), UINT32_C(0xCCBD142D), UINT32_C(0x9AC29470), + UINT32_C(0xA070965E), UINT32_C(0x25FF23ED), UINT32_C(0x6BDCA260) }, + { UINT32_C(0x87DCA7B3), UINT32_C(0xD4E00FD4), UINT32_C(0x9E0E8734), + UINT32_C(0xA5097833), UINT32_C(0x048173A4), UINT32_C(0xF73F162E), + UINT32_C(0x9C3C2FA2), UINT32_C(0xD23F9196), UINT32_C(0xE4AC397A), + UINT32_C(0x9AB98B45), UINT32_C(0x543F2D4B), UINT32_C(0x2BAA0300) } }, + { { UINT32_C(0xC658C445), UINT32_C(0xBBBE15E7), UINT32_C(0xC28941D1), + UINT32_C(0xB8CBCB20), UINT32_C(0x027D6540), UINT32_C(0x65549BE2), + UINT32_C(0x1E8EF4F4), UINT32_C(0xEBBCA802), UINT32_C(0xD2ACA397), + UINT32_C(0x18214B4B), UINT32_C(0xE31784A3), UINT32_C(0xCBEC7DE2) }, + { UINT32_C(0x0116FDF3), UINT32_C(0x96F0533F), UINT32_C(0x5C8F5EE1), + UINT32_C(0x68911C90), UINT32_C(0xD568603A), UINT32_C(0x7DE9A3AE), + UINT32_C(0x6A3AD7B7), UINT32_C(0x3F56C52C), UINT32_C(0x670B4D0E), + UINT32_C(0x5BE9AFCA), UINT32_C(0x375DFE2F), UINT32_C(0x628BFEEE) } }, + { { UINT32_C(0xDD4ADDB3), UINT32_C(0x97DAE81B), UINT32_C(0x8704761B), + UINT32_C(0x12D2CF4E), UINT32_C(0x3247788D), UINT32_C(0x5E820B40), + UINT32_C(0x0051CA80), UINT32_C(0x82234B62), UINT32_C(0x6CB5EA74), + UINT32_C(0x0C62704D), UINT32_C(0x23941593), UINT32_C(0xDE560420) }, + { UINT32_C(0xF1B04145), UINT32_C(0xB3912A3C), UINT32_C(0xAF93688D), + UINT32_C(0xE3967CD7), UINT32_C(0x58DABB4B), UINT32_C(0x2E2DCD2F), + UINT32_C(0x0E303911), UINT32_C(0x6564836F), UINT32_C(0xECE07C5C), + UINT32_C(0x1F10F19B), UINT32_C(0xD8919126), UINT32_C(0xB47F07EE) } }, + { { UINT32_C(0xE9A2EEC9), UINT32_C(0xE3545085), UINT32_C(0x2C8E51FE), + UINT32_C(0x81866A97), UINT32_C(0x50027243), UINT32_C(0xD2BA7DB5), + UINT32_C(0x4AE87DE4), UINT32_C(0x29DAEAB5), UINT32_C(0x684F9497), + UINT32_C(0x5EF3D4B8), UINT32_C(0x9D5D6873), UINT32_C(0xE2DACE3B) }, + { UINT32_C(0xFFD29C9C), UINT32_C(0xF012C951), UINT32_C(0xADBADA14), + UINT32_C(0x48289445), UINT32_C(0x89558C49), UINT32_C(0x8751F50D), + UINT32_C(0x99E35BEE), UINT32_C(0x75511A4F), UINT32_C(0x7D59AA5F), + UINT32_C(0xEF802D6E), UINT32_C(0xA2A795E2), UINT32_C(0x14FCAD65) } }, + { { UINT32_C(0x08CB8F2C), UINT32_C(0xC8EB00E8), UINT32_C(0x2B45BD86), + UINT32_C(0x68607532), UINT32_C(0x59969713), UINT32_C(0x7A29B459), + UINT32_C(0xD684201B), UINT32_C(0x5FA15B9B), UINT32_C(0xB9E538EE), + UINT32_C(0x1A853190), UINT32_C(0xD573D043), UINT32_C(0x4150605C) }, + { UINT32_C(0xEB9FBB68), UINT32_C(0xEF011D3B), UINT32_C(0x66AE32B6), + UINT32_C(0x67279982), UINT32_C(0x445DE5EC), UINT32_C(0x861B86EA), + UINT32_C(0xA34A50E1), UINT32_C(0x62837D18), UINT32_C(0xBF5F0663), + UINT32_C(0x228C006A), UINT32_C(0x396DB36A), UINT32_C(0xE007FDE7) } }, + { { UINT32_C(0x5A916A55), UINT32_C(0xDEE4F881), UINT32_C(0xF39C82CB), + UINT32_C(0x20DC0370), UINT32_C(0x40F09821), UINT32_C(0xD9A71615), + UINT32_C(0xF7273492), UINT32_C(0xD50AD8BF), UINT32_C(0x32E7C4BF), + UINT32_C(0xA06F7D12), UINT32_C(0x4C5CEA36), UINT32_C(0xFA0F6154) }, + { UINT32_C(0x5FC49CFE), UINT32_C(0xF4FD9BED), UINT32_C(0xC9291678), + UINT32_C(0xD8CB45D1), UINT32_C(0x7B92C9F2), UINT32_C(0x94DB86CC), + UINT32_C(0x73C81169), UINT32_C(0x09CA5F38), UINT32_C(0xAEED06F0), + UINT32_C(0x109F40B0), UINT32_C(0x14DCAA0A), UINT32_C(0x9F0360B2) } }, + { { UINT32_C(0xE12AD3E7), UINT32_C(0x4189B70D), UINT32_C(0x10B06607), + UINT32_C(0x5208ADB2), UINT32_C(0xEE8497FA), UINT32_C(0xEBD8E2A2), + UINT32_C(0xE04F2ECB), UINT32_C(0x61B1BD67), UINT32_C(0x4F3F5F99), + UINT32_C(0x0E2DDA72), UINT32_C(0xF747B16D), UINT32_C(0xD5D96740) }, + { UINT32_C(0xA6BF397F), UINT32_C(0x308A48F6), UINT32_C(0x23A93595), + UINT32_C(0x7021C3E5), UINT32_C(0x36470AA0), UINT32_C(0xF10B0229), + UINT32_C(0x4E03295B), UINT32_C(0x7761E8EC), UINT32_C(0x07339770), + UINT32_C(0x16EFEF58), UINT32_C(0x5DA5DAA2), UINT32_C(0x0D55D2DD) } }, + { { UINT32_C(0x8A22F87A), UINT32_C(0x915EA6A3), UINT32_C(0x2E5A088E), + UINT32_C(0x191151C1), UINT32_C(0x7F1D5CBE), UINT32_C(0x190252F1), + UINT32_C(0x3B0EC99B), UINT32_C(0xE43F59C3), UINT32_C(0xFF2A6135), + UINT32_C(0xBE8588D4), UINT32_C(0x2ECB4B9F), UINT32_C(0x103877CC) }, + { UINT32_C(0x023CF92B), UINT32_C(0x8F4147E5), UINT32_C(0x0CC2085B), + UINT32_C(0xC24384CC), UINT32_C(0xD082D311), UINT32_C(0x6A2DB4A2), + UINT32_C(0xED7BA9AE), UINT32_C(0x06283811), UINT32_C(0x2A8E1592), + UINT32_C(0xE9A3F532), UINT32_C(0x5A59E894), UINT32_C(0xAC20F0F4) } }, + { { UINT32_C(0x74AAB4B1), UINT32_C(0x788CAA52), UINT32_C(0x2FEAFC7E), + UINT32_C(0xEB84ABA1), UINT32_C(0xAC04FF77), UINT32_C(0x31DA71DA), + UINT32_C(0x24E4D0BF), UINT32_C(0x39D12EB9), UINT32_C(0x87A34EF8), + UINT32_C(0x4F2F292F), UINT32_C(0xA237A8ED), UINT32_C(0x9B324372) }, + { UINT32_C(0x2EE3A82D), UINT32_C(0xBB2D04B1), UINT32_C(0xD18D36B2), + UINT32_C(0xED4FF367), UINT32_C(0xA6EA0138), UINT32_C(0x99D231EE), + UINT32_C(0x4F92E04A), UINT32_C(0x7C2D4F06), UINT32_C(0xCA272FD0), + UINT32_C(0x78A82AB2), UINT32_C(0xAB8CDC32), UINT32_C(0x7EC41340) } }, + }, + { + { { UINT32_C(0xD2E15A8C), UINT32_C(0xD23658C8), UINT32_C(0x16BA28CA), + UINT32_C(0x23F93DF7), UINT32_C(0x082210F1), UINT32_C(0x6DAB10EC), + UINT32_C(0xBFC36490), UINT32_C(0xFB1ADD91), UINT32_C(0x9A4F2D14), + UINT32_C(0xEDA8B02F), UINT32_C(0x56560443), UINT32_C(0x9060318C) }, + { UINT32_C(0x64711AB2), UINT32_C(0x6C01479E), UINT32_C(0xE337EB85), + UINT32_C(0x41446FC7), UINT32_C(0x71888397), UINT32_C(0x4DCF3C1D), + UINT32_C(0x13C34FD2), UINT32_C(0x87A9C04E), UINT32_C(0x510C15AC), + UINT32_C(0xFE0E08EC), UINT32_C(0xC0F495D2), UINT32_C(0xFC0D0413) } }, + { { UINT32_C(0x156636C2), UINT32_C(0xEB05C516), UINT32_C(0x090E93FC), + UINT32_C(0x2F613ABA), UINT32_C(0x489576F5), UINT32_C(0xCFD573CD), + UINT32_C(0x535A8D57), UINT32_C(0xE6535380), UINT32_C(0x671436C4), + UINT32_C(0x13947314), UINT32_C(0x5F0A122D), UINT32_C(0x1172FB0C) }, + { UINT32_C(0xC12F58F6), UINT32_C(0xAECC7EC1), UINT32_C(0x8E41AFD2), + UINT32_C(0xFE42F957), UINT32_C(0x3D4221AA), UINT32_C(0xDF96F652), + UINT32_C(0x2851996B), UINT32_C(0xFEF5649F), UINT32_C(0xD5CFB67E), + UINT32_C(0x46FB9F26), UINT32_C(0xEF5C4052), UINT32_C(0xB047BFC7) } }, + { { UINT32_C(0xF4484374), UINT32_C(0x5CBDC442), UINT32_C(0xF92452EF), + UINT32_C(0x6B156957), UINT32_C(0xC118D02A), UINT32_C(0x58A26886), + UINT32_C(0x75AAF276), UINT32_C(0x87FF74E6), UINT32_C(0xF65F6EC1), + UINT32_C(0xB133BE95), UINT32_C(0x4B1B8D32), UINT32_C(0xA89B6284) }, + { UINT32_C(0x09C81004), UINT32_C(0xDD8A8EF3), UINT32_C(0x0CF21991), + UINT32_C(0x7F8225DB), UINT32_C(0x26623FAF), UINT32_C(0xD525A6DB), + UINT32_C(0xBAE15453), UINT32_C(0xF2368D40), UINT32_C(0x84F89FC9), + UINT32_C(0x55D6A84D), UINT32_C(0x86021A3E), UINT32_C(0xAF38358A) } }, + { { UINT32_C(0xFF52E280), UINT32_C(0xBD048BDC), UINT32_C(0x526A1795), + UINT32_C(0x8A51D0B2), UINT32_C(0xA985AC0F), UINT32_C(0x40AAA758), + UINT32_C(0xF2C7ACE9), UINT32_C(0x6039BCDC), UINT32_C(0x6AEC347D), + UINT32_C(0x712092CC), UINT32_C(0x6B5ACAB7), UINT32_C(0x7976D090) }, + { UINT32_C(0x6EED9617), UINT32_C(0x1EBCF80D), UINT32_C(0xB0F404A4), + UINT32_C(0xB3A63149), UINT32_C(0xD0B610EF), UINT32_C(0x3FDD3D1A), + UINT32_C(0x98C28AC7), UINT32_C(0xDD3F6F94), UINT32_C(0x3A59750F), + UINT32_C(0x650B7794), UINT32_C(0x2D3991AC), UINT32_C(0xEC59BAB1) } }, + { { UINT32_C(0x2E552766), UINT32_C(0x01F40E88), UINT32_C(0x66F5354F), + UINT32_C(0x1FE3D509), UINT32_C(0xB3A8EA7F), UINT32_C(0x0E46D006), + UINT32_C(0xF831CD6A), UINT32_C(0xF75AB629), UINT32_C(0x91465119), + UINT32_C(0xDAD808D7), UINT32_C(0x17EF9B10), UINT32_C(0x442405AF) }, + { UINT32_C(0x672BDFCB), UINT32_C(0xD5FE0A96), UINT32_C(0x355DBDEC), + UINT32_C(0xA9DFA422), UINT32_C(0x79B25636), UINT32_C(0xFDB79AA1), + UINT32_C(0xEECE8AEC), UINT32_C(0xE7F26FFD), UINT32_C(0x7EDD5AA2), + UINT32_C(0xB5925550), UINT32_C(0x8EB3A6C2), UINT32_C(0x2C8F6FF0) } }, + { { UINT32_C(0x757D6136), UINT32_C(0x88887756), UINT32_C(0x88B92E72), + UINT32_C(0xAD9AC183), UINT32_C(0x8785D3EB), UINT32_C(0x92CB2FC4), + UINT32_C(0x9319764B), UINT32_C(0xD1A542FE), UINT32_C(0x626A62F8), + UINT32_C(0xAF4CC78F), UINT32_C(0x26BFFAAE), UINT32_C(0x7F3F5FC9) }, + { UINT32_C(0x40AE2231), UINT32_C(0x0A203D43), UINT32_C(0x387898E8), + UINT32_C(0xA8BFD9E0), UINT32_C(0x474B7DDD), UINT32_C(0x1A0C379C), + UINT32_C(0x34FD49EA), UINT32_C(0x03855E0A), UINT32_C(0xB3EF4AE1), + UINT32_C(0x02B26223), UINT32_C(0xE399E0A3), UINT32_C(0x804BD8CF) } }, + { { UINT32_C(0xDE865713), UINT32_C(0x11A9F3D0), UINT32_C(0xBDE98821), + UINT32_C(0x81E36B6B), UINT32_C(0x6AA891D0), UINT32_C(0x324996C8), + UINT32_C(0x395682B5), UINT32_C(0x7B95BDC1), UINT32_C(0xC1600563), + UINT32_C(0x47BF2219), UINT32_C(0x643E38B4), UINT32_C(0x7A473F50) }, + { UINT32_C(0xF5738288), UINT32_C(0x0911F50A), UINT32_C(0x6F9C415B), + UINT32_C(0xDF947A70), UINT32_C(0x67A067F6), UINT32_C(0xBDB994F2), + UINT32_C(0x88BE96CD), UINT32_C(0x3F4BEC1B), UINT32_C(0xE56DD6D9), + UINT32_C(0x9820E931), UINT32_C(0x0A80F419), UINT32_C(0xB138F14F) } }, + { { UINT32_C(0x0429077A), UINT32_C(0xA11A1A8F), UINT32_C(0x10351C68), + UINT32_C(0x2BB1E33D), UINT32_C(0x89459A27), UINT32_C(0x3C25ABFE), + UINT32_C(0x6B8AC774), UINT32_C(0x2D0091B8), UINT32_C(0x3B2415D9), + UINT32_C(0xDAFC7853), UINT32_C(0x9201680D), UINT32_C(0xDE713CF1) }, + { UINT32_C(0x68889D57), UINT32_C(0x8E5F445D), UINT32_C(0x60EABF5B), + UINT32_C(0x608B209C), UINT32_C(0xF9CFA408), UINT32_C(0x10EC0ACC), + UINT32_C(0x4D1EE754), UINT32_C(0xD5256B9D), UINT32_C(0x0AA6C18D), + UINT32_C(0xFF866BAB), UINT32_C(0xACB90A45), UINT32_C(0x9D196DB8) } }, + { { UINT32_C(0xB9B081B2), UINT32_C(0xA46D76A9), UINT32_C(0x62163C25), + UINT32_C(0xFC743A10), UINT32_C(0x7761C392), UINT32_C(0xCD2A5C8D), + UINT32_C(0xBE808583), UINT32_C(0x39BDDE0B), UINT32_C(0xB98E4DFE), + UINT32_C(0x7C416021), UINT32_C(0x65913A44), UINT32_C(0xF930E563) }, + { UINT32_C(0x7585CF3C), UINT32_C(0xC3555F7E), UINT32_C(0x3D6333D5), + UINT32_C(0xC737E383), UINT32_C(0xB430B03D), UINT32_C(0x5B60DBA4), + UINT32_C(0xE7555404), UINT32_C(0x42B715EB), UINT32_C(0x7C7796E3), + UINT32_C(0x571BDF5B), UINT32_C(0x6DB6331F), UINT32_C(0x33DC62C6) } }, + { { UINT32_C(0xE61DEE59), UINT32_C(0x3FB9CCB0), UINT32_C(0x18B14DB9), + UINT32_C(0xC5185F23), UINT32_C(0x845EF36C), UINT32_C(0x1B2ADC4F), + UINT32_C(0x5C1A33AB), UINT32_C(0x195D5B50), UINT32_C(0x421F59D2), + UINT32_C(0x8CEA528E), UINT32_C(0xD2931CEA), UINT32_C(0x7DFCCECF) }, + { UINT32_C(0x8CF7E3F7), UINT32_C(0x51FFA1D5), UINT32_C(0xBDC9FB43), + UINT32_C(0xF01B7886), UINT32_C(0x261A0D35), UINT32_C(0xD65AB610), + UINT32_C(0x7574A554), UINT32_C(0x84BCBAFD), UINT32_C(0xFAD70208), + UINT32_C(0x4B119956), UINT32_C(0x4FAB5243), UINT32_C(0xDDC329C2) } }, + { { UINT32_C(0x9CE92177), UINT32_C(0x1A08AA57), UINT32_C(0xDC2B5C36), + UINT32_C(0x3395E557), UINT32_C(0x394ED04E), UINT32_C(0xFDFE7041), + UINT32_C(0xC6DFCDDE), UINT32_C(0xB797EB24), UINT32_C(0xCB9DE5D6), + UINT32_C(0x284A6B2A), UINT32_C(0x07222765), UINT32_C(0xE0BD95C8) }, + { UINT32_C(0x9FE678A7), UINT32_C(0x114A951B), UINT32_C(0x9E4954EC), + UINT32_C(0xE7ECD0BD), UINT32_C(0x79F0B8A9), UINT32_C(0x7D4096FE), + UINT32_C(0x09724FE2), UINT32_C(0xBDB26E9A), UINT32_C(0xF787AF95), + UINT32_C(0x08741AD8), UINT32_C(0x24045AD8), UINT32_C(0x2BF97272) } }, + { { UINT32_C(0xA9451D57), UINT32_C(0xAB1FEDD9), UINT32_C(0x483E38C9), + UINT32_C(0xDF4D91DF), UINT32_C(0x24E9CF8E), UINT32_C(0x2D54D311), + UINT32_C(0x7A22EEB6), UINT32_C(0x9C2A5AF8), UINT32_C(0x0A43F123), + UINT32_C(0xBD9861EF), UINT32_C(0x38A18B7B), UINT32_C(0x581EA6A2) }, + { UINT32_C(0x296470A3), UINT32_C(0xAF339C85), UINT32_C(0xAFD8203E), + UINT32_C(0xF9603FCD), UINT32_C(0x96763C28), UINT32_C(0x95D05350), + UINT32_C(0x860EC831), UINT32_C(0x15445C16), UINT32_C(0x6867A323), + UINT32_C(0x2AFB8728), UINT32_C(0x0C4838BF), UINT32_C(0x4B152D6D) } }, + { { UINT32_C(0x837CACBA), UINT32_C(0x45BA0E4F), UINT32_C(0xC0725275), + UINT32_C(0x7ADB38AE), UINT32_C(0x942D3C28), UINT32_C(0x19C82831), + UINT32_C(0x6D0FE7DD), UINT32_C(0x94F4731D), UINT32_C(0x4898F1E6), + UINT32_C(0xC3C07E13), UINT32_C(0xED410B51), UINT32_C(0x76350EAC) }, + { UINT32_C(0xF99AACFC), UINT32_C(0x0FA8BECA), UINT32_C(0x65FAF9CF), + UINT32_C(0x2834D86F), UINT32_C(0x6F3866AF), UINT32_C(0x8E62846A), + UINT32_C(0x3DFD6A2B), UINT32_C(0xDAA9BD4F), UINT32_C(0xA6132655), + UINT32_C(0xC27115BB), UINT32_C(0xBD5A32C2), UINT32_C(0x83972DF7) } }, + { { UINT32_C(0xD513B825), UINT32_C(0xA330CB5B), UINT32_C(0xEE37BEC3), + UINT32_C(0xAE18B2D3), UINT32_C(0xF780A902), UINT32_C(0xFC3AB80A), + UINT32_C(0xD607DDF1), UINT32_C(0xD7835BE2), UINT32_C(0x5B6E4C2B), + UINT32_C(0x8120F767), UINT32_C(0x67E78CCB), UINT32_C(0xAA8C3859) }, + { UINT32_C(0xAA0ED321), UINT32_C(0xA8DA8CE2), UINT32_C(0xD766341A), + UINT32_C(0xCB8846FD), UINT32_C(0x33DC9D9A), UINT32_C(0xF2A342EE), + UINT32_C(0xD0A18A80), UINT32_C(0xA519E0BE), UINT32_C(0xAF48DF4C), + UINT32_C(0x9CDAA39C), UINT32_C(0x7E0C19EE), UINT32_C(0xA4B500CA) } }, + { { UINT32_C(0x8217001B), UINT32_C(0x83A7FD2F), UINT32_C(0x4296A8BA), + UINT32_C(0x4F6FCF06), UINT32_C(0x91619927), UINT32_C(0x7D748643), + UINT32_C(0x941E4D41), UINT32_C(0x174C1075), UINT32_C(0xA64F5A6C), + UINT32_C(0x037EDEBD), UINT32_C(0x6E29DC56), UINT32_C(0xCF64DB3A) }, + { UINT32_C(0x37C0B9F4), UINT32_C(0x150B3ACE), UINT32_C(0x7168178B), + UINT32_C(0x1323234A), UINT32_C(0xEF4D1879), UINT32_C(0x1CE47014), + UINT32_C(0x17FB4D5C), UINT32_C(0xA22E3742), UINT32_C(0xD985F794), + UINT32_C(0x69B81822), UINT32_C(0x081D7214), UINT32_C(0x199C21C4) } }, + { { UINT32_C(0x8F04B4D2), UINT32_C(0x160BC7A1), UINT32_C(0xB10DE174), + UINT32_C(0x79CA81DD), UINT32_C(0x2DA1E9C7), UINT32_C(0xE2A280B0), + UINT32_C(0x1D6A0A29), UINT32_C(0xB4F6BD99), UINT32_C(0x1C5B8F27), + UINT32_C(0x57CF3EDD), UINT32_C(0x158C2FD4), UINT32_C(0x7E34FC57) }, + { UINT32_C(0xCAC93459), UINT32_C(0x828CFD89), UINT32_C(0xB7AF499F), + UINT32_C(0x9E631B6F), UINT32_C(0xDA26C135), UINT32_C(0xF4DC8BC0), + UINT32_C(0x37186735), UINT32_C(0x6128ED39), UINT32_C(0x67BF0BA5), + UINT32_C(0xBB45538B), UINT32_C(0x0064A3AB), UINT32_C(0x1ADDD4C1) } }, + }, + { + { { UINT32_C(0xDD14D47E), UINT32_C(0xC32730E8), UINT32_C(0xC0F01E0F), + UINT32_C(0xCDC1FD42), UINT32_C(0x3F5CD846), UINT32_C(0x2BACFDBF), + UINT32_C(0x7272D4DD), UINT32_C(0x45F36416), UINT32_C(0x5EB75776), + UINT32_C(0xDD813A79), UINT32_C(0x50997BE2), UINT32_C(0xB57885E4) }, + { UINT32_C(0xDB8C9829), UINT32_C(0xDA054E2B), UINT32_C(0xAAB5A594), + UINT32_C(0x4161D820), UINT32_C(0x026116A3), UINT32_C(0x4C428F31), + UINT32_C(0xDCD85E91), UINT32_C(0x372AF9A0), UINT32_C(0x673ADC2D), + UINT32_C(0xFDA6E903), UINT32_C(0xA8DB59E6), UINT32_C(0x4526B8AC) } }, + { { UINT32_C(0xE23A8472), UINT32_C(0x68FE359D), UINT32_C(0x4CE3C101), + UINT32_C(0x43EB12BD), UINT32_C(0xFC704935), UINT32_C(0x0EC652C3), + UINT32_C(0x52E4E22D), UINT32_C(0x1EEFF1F9), UINT32_C(0x083E3ADA), + UINT32_C(0xBA6777CB), UINT32_C(0x8BEFC871), UINT32_C(0xAB52D7DC) }, + { UINT32_C(0x497CBD59), UINT32_C(0x4EDE689F), UINT32_C(0x27577DD9), + UINT32_C(0xC8AE42B9), UINT32_C(0x7AB83C27), UINT32_C(0xE0F08051), + UINT32_C(0x2C8C1F48), UINT32_C(0x1F3D5F25), UINT32_C(0xAF241AAC), + UINT32_C(0x57991607), UINT32_C(0xB8A337E0), UINT32_C(0xC4458B0A) } }, + { { UINT32_C(0x51DD1BA9), UINT32_C(0x3DBB3FA6), UINT32_C(0x545E960B), + UINT32_C(0xE53C1C4D), UINT32_C(0x793CE803), UINT32_C(0x35AC6574), + UINT32_C(0x83DBCE4F), UINT32_C(0xB2697DC7), UINT32_C(0xE13CF6B0), + UINT32_C(0xE35C5BF2), UINT32_C(0xB0C4A164), UINT32_C(0x35034280) }, + { UINT32_C(0xD9C0D3C1), UINT32_C(0xAA490908), UINT32_C(0xCB4D2E90), + UINT32_C(0x2CCE614D), UINT32_C(0x54D504E4), UINT32_C(0xF646E96C), + UINT32_C(0xB73310A3), UINT32_C(0xD74E7541), UINT32_C(0x18BDE5DA), + UINT32_C(0xEAD71596), UINT32_C(0xAA09AEF7), UINT32_C(0x96E7F4A8) } }, + { { UINT32_C(0x5D6E5F48), UINT32_C(0xA8393A24), UINT32_C(0xF9175CE8), + UINT32_C(0x2C8D7EA2), UINT32_C(0x55A20268), UINT32_C(0xD8824E02), + UINT32_C(0xA446BCC6), UINT32_C(0x9DD9A272), UINT32_C(0x5351499B), + UINT32_C(0xC929CDED), UINT32_C(0xCFE76535), UINT32_C(0xEA5AD9EC) }, + { UINT32_C(0xDC32D001), UINT32_C(0x26F3D7D9), UINT32_C(0x43EB9689), + UINT32_C(0x51C3BE83), UINT32_C(0x759E6DDB), UINT32_C(0x91FDCC06), + UINT32_C(0xE302B891), UINT32_C(0xAC2E1904), UINT32_C(0xC207E1F7), + UINT32_C(0xAD25C645), UINT32_C(0xAB3DEB4A), UINT32_C(0x28A70F0D) } }, + { { UINT32_C(0x03BEA8F1), UINT32_C(0x922D7F97), UINT32_C(0x584570BE), + UINT32_C(0x3AD820D4), UINT32_C(0x3CD46B43), UINT32_C(0x0CE0A850), + UINT32_C(0xAE66743D), UINT32_C(0x4C07911F), UINT32_C(0xFDA60023), + UINT32_C(0x66519EB9), UINT32_C(0xEC2ACD9C), UINT32_C(0x7F83004B) }, + { UINT32_C(0xC3117EAD), UINT32_C(0x001E0B80), UINT32_C(0x0722BA25), + UINT32_C(0xBB72D541), UINT32_C(0x6E9A5078), UINT32_C(0x3AF7DB96), + UINT32_C(0x701B6B4C), UINT32_C(0x86C5774E), UINT32_C(0x37824DB5), + UINT32_C(0xBD2C0E8E), UINT32_C(0xBFAC286D), UINT32_C(0x3AE3028C) } }, + { { UINT32_C(0xA33E071B), UINT32_C(0x83D4D4A8), UINT32_C(0x61444BB5), + UINT32_C(0x881C0A92), UINT32_C(0x520E3BC3), UINT32_C(0xEEA1E292), + UINT32_C(0x2AAAB729), UINT32_C(0x5A5F4C3C), UINT32_C(0xE63C7C94), + UINT32_C(0x0B766C5E), UINT32_C(0xBB2CC79C), UINT32_C(0x62BB8A9F) }, + { UINT32_C(0xAA5DC49D), UINT32_C(0x97ADC7D2), UINT32_C(0x31718681), + UINT32_C(0x30CC26B3), UINT32_C(0x56E86EDE), UINT32_C(0xAC86E6FF), + UINT32_C(0xCD52F7F2), UINT32_C(0x37BCA7A2), UINT32_C(0x9CE6D87F), + UINT32_C(0x734D2C94), UINT32_C(0xC2F7E0CA), UINT32_C(0x06A71D71) } }, + { { UINT32_C(0xC6357D33), UINT32_C(0x559DCF75), UINT32_C(0x652517DE), + UINT32_C(0x4616D940), UINT32_C(0x1CCF207B), UINT32_C(0x3D576B98), + UINT32_C(0x1979F631), UINT32_C(0x51E2D1EF), UINT32_C(0x06AE8296), + UINT32_C(0x57517DDD), UINT32_C(0xD6E7151F), UINT32_C(0x309A3D7F) }, + { UINT32_C(0x0E3A6FE5), UINT32_C(0xBA2A23E6), UINT32_C(0xD28B22C3), + UINT32_C(0x76CF674A), UINT32_C(0xF8B808C3), UINT32_C(0xD235AD07), + UINT32_C(0x6B71213A), UINT32_C(0x7BBF4C58), UINT32_C(0x93271EBB), + UINT32_C(0x0676792E), UINT32_C(0x05B1FC31), UINT32_C(0x2CFD2C76) } }, + { { UINT32_C(0x37A450F5), UINT32_C(0x4258E5C0), UINT32_C(0x52D2B118), + UINT32_C(0xC3245F1B), UINT32_C(0x82BC5963), UINT32_C(0x6DF7B484), + UINT32_C(0x9C273D1E), UINT32_C(0xE520DA4D), UINT32_C(0x2C3010E5), + UINT32_C(0xED78E012), UINT32_C(0x3C1D4C05), UINT32_C(0x11222948) }, + { UINT32_C(0xC692B490), UINT32_C(0xE3DAE5AF), UINT32_C(0xC197F793), + UINT32_C(0x3272BD10), UINT32_C(0xE709ACAA), UINT32_C(0xF7EAE411), + UINT32_C(0x778270A6), UINT32_C(0x00B0C95F), UINT32_C(0x220D4350), + UINT32_C(0x4DA76EE1), UINT32_C(0xAB71E308), UINT32_C(0x521E1461) } }, + { { UINT32_C(0x343196A3), UINT32_C(0x7B654323), UINT32_C(0xB0C95250), + UINT32_C(0x35D442AD), UINT32_C(0xE264FF17), UINT32_C(0x38AF50E6), + UINT32_C(0x2030D2EA), UINT32_C(0x28397A41), UINT32_C(0xF74EEDA1), + UINT32_C(0x8F1D84E9), UINT32_C(0xE6FB3C52), UINT32_C(0xD521F92D) }, + { UINT32_C(0x95733811), UINT32_C(0xAF358D77), UINT32_C(0x93ABFE94), + UINT32_C(0xEBFDDD01), UINT32_C(0xD18D99DE), UINT32_C(0x05D8A028), + UINT32_C(0xB5D5BDD9), UINT32_C(0x5A664019), UINT32_C(0x2AA12FE8), + UINT32_C(0x3DF17282), UINT32_C(0xB889A28E), UINT32_C(0xB42E006F) } }, + { { UINT32_C(0xBC35CB1A), UINT32_C(0xCF10E97D), UINT32_C(0x994DEDC5), + UINT32_C(0xC70A7BBD), UINT32_C(0x37D04FB9), UINT32_C(0x76A5327C), + UINT32_C(0xA76E0CDA), UINT32_C(0x87539F76), UINT32_C(0xCD60A6B1), + UINT32_C(0xE9FE493F), UINT32_C(0x132F01C0), UINT32_C(0xA4574796) }, + { UINT32_C(0xDB70B167), UINT32_C(0xC43B85EB), UINT32_C(0x98551DFA), + UINT32_C(0x81D5039A), UINT32_C(0x1D979FA4), UINT32_C(0x6B56FBE9), + UINT32_C(0x8615098F), UINT32_C(0x49714FD7), UINT32_C(0x94DECAB5), + UINT32_C(0xB10E1CEA), UINT32_C(0x480EF6E3), UINT32_C(0x8342EBA3) } }, + { { UINT32_C(0xB3677288), UINT32_C(0xE1E030B0), UINT32_C(0x8D5CE3AF), + UINT32_C(0x2978174C), UINT32_C(0xF7B2DE98), UINT32_C(0xAFC0271C), + UINT32_C(0xB99C20B5), UINT32_C(0x745BC6F3), UINT32_C(0x1E3BB4E5), + UINT32_C(0x9F6EDCED), UINT32_C(0x73C8C1FC), UINT32_C(0x58D3EE4E) }, + { UINT32_C(0x7FD30124), UINT32_C(0x1F3535F4), UINT32_C(0x5FA62502), + UINT32_C(0xF366AC70), UINT32_C(0x965363FE), UINT32_C(0x4C4C1FDD), + UINT32_C(0x1DE2CA2B), UINT32_C(0x8B2C7777), UINT32_C(0x882F1173), + UINT32_C(0x0CB54743), UINT32_C(0x71343331), UINT32_C(0x94B6B8C0) } }, + { { UINT32_C(0x65B8B35B), UINT32_C(0x75AF0141), UINT32_C(0x4670A1F5), + UINT32_C(0x6D7B8485), UINT32_C(0xA3B6D376), UINT32_C(0x6EAA3A47), + UINT32_C(0xCB3E5B66), UINT32_C(0xD7E673D2), UINT32_C(0x9589AB38), + UINT32_C(0xC0338E6C), UINT32_C(0x09440FAA), UINT32_C(0x4BE26CB3) }, + { UINT32_C(0x394F9AA3), UINT32_C(0x82CB05E7), UINT32_C(0x7F7792EA), + UINT32_C(0xC45C8A8A), UINT32_C(0xB687DC70), UINT32_C(0x37E5E33B), + UINT32_C(0xDFE48E49), UINT32_C(0x63853219), UINT32_C(0x6D0E5C8C), + UINT32_C(0x087951C1), UINT32_C(0x2BC27310), UINT32_C(0x7696A8C7) } }, + { { UINT32_C(0xB67E834A), UINT32_C(0xA05736D5), UINT32_C(0x9098D42A), + UINT32_C(0xDD2AA0F2), UINT32_C(0x49C69DDC), UINT32_C(0x09F0C1D8), + UINT32_C(0x8FF0F0F3), UINT32_C(0x81F8BC1C), UINT32_C(0x03037775), + UINT32_C(0x36FD3A4F), UINT32_C(0x4B06DF5C), UINT32_C(0x8286717D) }, + { UINT32_C(0xA9079EA2), UINT32_C(0xB878F496), UINT32_C(0xD7DC796D), + UINT32_C(0xA5642426), UINT32_C(0x67FDAC2B), UINT32_C(0x29B9351A), + UINT32_C(0x1D543CDE), UINT32_C(0x93774C0E), UINT32_C(0x1A8E31C4), + UINT32_C(0x4F8793BA), UINT32_C(0x6C94798A), UINT32_C(0x7C9F3F3A) } }, + { { UINT32_C(0xCB8ECDB8), UINT32_C(0x23C5AD11), UINT32_C(0x485A6A02), + UINT32_C(0x1E88D25E), UINT32_C(0xF1E268AE), UINT32_C(0xB27CBE84), + UINT32_C(0xF4CD0475), UINT32_C(0xDDA80238), UINT32_C(0x49F8EB1B), + UINT32_C(0x4F88857B), UINT32_C(0x52FB07F9), UINT32_C(0x91B1221F) }, + { UINT32_C(0x8637FA67), UINT32_C(0x7CE97460), UINT32_C(0x632198D8), + UINT32_C(0x528B3CF4), UINT32_C(0xF6623769), UINT32_C(0x33365AB3), + UINT32_C(0x3A83A30F), UINT32_C(0x6FEBCFFF), UINT32_C(0x9BD341EB), + UINT32_C(0x398F4C99), UINT32_C(0xB33A333C), UINT32_C(0x180712BB) } }, + { { UINT32_C(0xD93429E7), UINT32_C(0x2B8655A2), UINT32_C(0x75C8B9EE), + UINT32_C(0x99D600BB), UINT32_C(0x88FCA6CD), UINT32_C(0x9FC1AF8B), + UINT32_C(0x7C311F80), UINT32_C(0x2FB53386), UINT32_C(0xE8A71EEE), + UINT32_C(0x20743ECB), UINT32_C(0xE848B49E), UINT32_C(0xEC3713C4) }, + { UINT32_C(0xBB886817), UINT32_C(0x5B2037B5), UINT32_C(0x307DBAF4), + UINT32_C(0x40EF5AC2), UINT32_C(0x1B3F643D), UINT32_C(0xC2888AF2), + UINT32_C(0x9D5A4190), UINT32_C(0x0D8252E1), UINT32_C(0x2DB52A8A), + UINT32_C(0x06CC0BEC), UINT32_C(0xAB94E969), UINT32_C(0xB84B98EA) } }, + { { UINT32_C(0xA0321E0E), UINT32_C(0x2E7AC078), UINT32_C(0xEF3DAAB6), + UINT32_C(0x5C5A1168), UINT32_C(0xADDD454A), UINT32_C(0xD2D573CB), + UINT32_C(0x36259CC7), UINT32_C(0x27E149E2), UINT32_C(0xA63F47F1), + UINT32_C(0x1EDFD469), UINT32_C(0xF1BD2CFD), UINT32_C(0x039AD674) }, + { UINT32_C(0x3077D3CC), UINT32_C(0xBFA633FC), UINT32_C(0x2FD64E9F), + UINT32_C(0x14A7C82F), UINT32_C(0x9D824999), UINT32_C(0xAAA65014), + UINT32_C(0x21760F2E), UINT32_C(0x41AB113B), UINT32_C(0x1CAE260A), + UINT32_C(0x23E646C5), UINT32_C(0x68DC5159), UINT32_C(0x08062C8F) } }, + }, + { + { { UINT32_C(0x204BE028), UINT32_C(0x2E7D0A16), UINT32_C(0xD0E41851), + UINT32_C(0x4F1D082E), UINT32_C(0x3EB317F9), UINT32_C(0x15F1DDC6), + UINT32_C(0x5ADF71D7), UINT32_C(0xF0275071), UINT32_C(0xEE858BC3), + UINT32_C(0x2CE33C2E), UINT32_C(0xDA73B71A), UINT32_C(0xA24C76D1) }, + { UINT32_C(0x6C70C483), UINT32_C(0x9EF6A70A), UINT32_C(0x05CF9612), + UINT32_C(0xEFCF1705), UINT32_C(0x7502DE64), UINT32_C(0x9F5BF5A6), + UINT32_C(0xA4701973), UINT32_C(0xD11122A1), UINT32_C(0xA2EA7B24), + UINT32_C(0x82CFAAC2), UINT32_C(0x0A4582E1), UINT32_C(0x6CAD67CC) } }, + { { UINT32_C(0xB4DC8600), UINT32_C(0x597A26FF), UINT32_C(0xF9288555), + UINT32_C(0x264A09F3), UINT32_C(0x5C27F5F6), UINT32_C(0x0B06AFF6), + UINT32_C(0xD8D544E6), UINT32_C(0xCE5AB665), UINT32_C(0x99275C32), + UINT32_C(0x92F031BE), UINT32_C(0xF42E0E7C), UINT32_C(0xAF51C5BB) }, + { UINT32_C(0x1E37B36D), UINT32_C(0x5BB28B06), UINT32_C(0x8473543A), + UINT32_C(0x583FBA6A), UINT32_C(0xF93FB7DC), UINT32_C(0xE73FD299), + UINT32_C(0x6E2CCAD9), UINT32_C(0xFCD999A8), UINT32_C(0x334D4F57), + UINT32_C(0xB8C8A6DF), UINT32_C(0x9A2ACC9B), UINT32_C(0x5ADB28DD) } }, + { { UINT32_C(0x111792B9), UINT32_C(0x5ADF3D9A), UINT32_C(0x4F1E0D09), + UINT32_C(0x1C77A305), UINT32_C(0xA82D3736), UINT32_C(0xF9FBCE33), + UINT32_C(0x718C8AA3), UINT32_C(0xF307823E), UINT32_C(0x416CCF69), + UINT32_C(0x860578CF), UINT32_C(0x1EF8465B), UINT32_C(0xB942ADD8) }, + { UINT32_C(0xCD9472E1), UINT32_C(0x9EE0CF97), UINT32_C(0xB01528A8), + UINT32_C(0xE6792EEF), UINT32_C(0xC09DA90B), UINT32_C(0xF99B9A8D), + UINT32_C(0xCBF3CCB8), UINT32_C(0x1F521C2D), UINT32_C(0x91A62632), + UINT32_C(0x6BF66948), UINT32_C(0x854FE9DA), UINT32_C(0xCC7A9CEB) } }, + { { UINT32_C(0x491CCB92), UINT32_C(0x46303171), UINT32_C(0x2771235B), + UINT32_C(0xA80A8C0D), UINT32_C(0xF172C7CF), UINT32_C(0xD8E497FF), + UINT32_C(0x35B193CF), UINT32_C(0x7F7009D7), UINT32_C(0xF19DF4BC), + UINT32_C(0x6B9FD3F7), UINT32_C(0xB46F1E37), UINT32_C(0xADA548C3) }, + { UINT32_C(0xC7A20270), UINT32_C(0x87C6EAA9), UINT32_C(0xAE78EF99), + UINT32_C(0xEF2245D6), UINT32_C(0x539EAB95), UINT32_C(0x2A121042), + UINT32_C(0x79B8F5CC), UINT32_C(0x29A6D5D7), UINT32_C(0xB77840DC), + UINT32_C(0x33803A10), UINT32_C(0x11A6A30F), UINT32_C(0xFEDD3A70) } }, + { { UINT32_C(0x142403D1), UINT32_C(0xFA070E22), UINT32_C(0x15C6F7F5), + UINT32_C(0x68FF3160), UINT32_C(0x223A0CE8), UINT32_C(0xE09F04E6), + UINT32_C(0x53E14183), UINT32_C(0x22BBD018), UINT32_C(0xCF45B75B), + UINT32_C(0x35D9FAFC), UINT32_C(0x7ECEEC88), UINT32_C(0x3A34819D) }, + { UINT32_C(0xD33262D2), UINT32_C(0xD9CF7568), UINT32_C(0x841D1505), + UINT32_C(0x431036D5), UINT32_C(0x9EB2A79A), UINT32_C(0x0C800565), + UINT32_C(0x5F7EDC6A), UINT32_C(0x8E77D9F0), UINT32_C(0x65E800AA), + UINT32_C(0x19E12D05), UINT32_C(0xB7784E7C), UINT32_C(0x335C8D36) } }, + { { UINT32_C(0x6484FD40), UINT32_C(0x8B2FC4E9), UINT32_C(0xA35D24EA), + UINT32_C(0xEE702764), UINT32_C(0xB871C3F3), UINT32_C(0x15B28AC7), + UINT32_C(0xE097047F), UINT32_C(0x805B4048), UINT32_C(0x647CAD2F), + UINT32_C(0xD6F1B8DF), UINT32_C(0xDC7DD67F), UINT32_C(0xF1D5B458) }, + { UINT32_C(0x25148803), UINT32_C(0x324C529C), UINT32_C(0x21274FAF), + UINT32_C(0xF6185EBE), UINT32_C(0x95148B55), UINT32_C(0xAF14751E), + UINT32_C(0x28F284F4), UINT32_C(0x283ED89D), UINT32_C(0x4CBEBF1A), + UINT32_C(0x93AD20E7), UINT32_C(0x882935E1), UINT32_C(0x5F6EC65D) } }, + { { UINT32_C(0xA4DCEFE9), UINT32_C(0xE222EBA4), UINT32_C(0xEC1CEB74), + UINT32_C(0x63AD235F), UINT32_C(0xE05B18E7), UINT32_C(0x2E0BF749), + UINT32_C(0xB48BDD87), UINT32_C(0x547BD050), UINT32_C(0xF5AA2FC4), + UINT32_C(0x0490C970), UINT32_C(0x2B431390), UINT32_C(0xCED5E4CF) }, + { UINT32_C(0x51D2898E), UINT32_C(0x07D82704), UINT32_C(0x083B57D4), + UINT32_C(0x44B72442), UINT32_C(0x5037FCE8), UINT32_C(0xA4ADA230), + UINT32_C(0x50510DA6), UINT32_C(0x55F7905E), UINT32_C(0x8D890A98), + UINT32_C(0xD8EE724F), UINT32_C(0x11B85640), UINT32_C(0x925A8E7C) } }, + { { UINT32_C(0x1CA459ED), UINT32_C(0x5BFA10CD), UINT32_C(0x6DCF56BF), + UINT32_C(0x593F085A), UINT32_C(0xC0579C3E), UINT32_C(0xE6F0AD9B), + UINT32_C(0x2527C1AD), UINT32_C(0xC11C95A2), UINT32_C(0xCF1CB8B3), + UINT32_C(0x7CFA71E1), UINT32_C(0x1D6DC79D), UINT32_C(0xEDCFF833) }, + { UINT32_C(0x432521C9), UINT32_C(0x581C4BBE), UINT32_C(0x144E11A0), + UINT32_C(0xBF620096), UINT32_C(0xBE3A107B), UINT32_C(0x54C38B71), + UINT32_C(0xE2606EC0), UINT32_C(0xED555E37), UINT32_C(0xD721D034), + UINT32_C(0x3FB148B8), UINT32_C(0x0091BC90), UINT32_C(0x79D53DAD) } }, + { { UINT32_C(0xB7082C80), UINT32_C(0xE32068C5), UINT32_C(0x7A144E22), + UINT32_C(0x4140FFD2), UINT32_C(0x9EDD9E86), UINT32_C(0x5811D2F0), + UINT32_C(0xC572C465), UINT32_C(0xCDD79B5F), UINT32_C(0xC97BF450), + UINT32_C(0x3563FED1), UINT32_C(0xF2CE5C9C), UINT32_C(0x985C1444) }, + { UINT32_C(0x99950F1C), UINT32_C(0x260AE797), UINT32_C(0x765E9DED), + UINT32_C(0x659F4F40), UINT32_C(0x2E3BC286), UINT32_C(0x2A412D66), + UINT32_C(0xF87E0C82), UINT32_C(0xE865E62C), UINT32_C(0x6C05E7D7), + UINT32_C(0xD63D3A9A), UINT32_C(0x8686F89A), UINT32_C(0x96725D67) } }, + { { UINT32_C(0xAB7EA0F5), UINT32_C(0xC99A5E4C), UINT32_C(0xC5393FA9), + UINT32_C(0xC9860A1A), UINT32_C(0x8FDEEFC0), UINT32_C(0x9ED83CEE), + UINT32_C(0x5ED6869A), UINT32_C(0xE3EA8B4C), UINT32_C(0xD2EED3A9), + UINT32_C(0x89A85463), UINT32_C(0xE421A622), UINT32_C(0x2CD91B6D) }, + { UINT32_C(0x2C91C41D), UINT32_C(0x6FEC1EF3), UINT32_C(0x8171037D), + UINT32_C(0xB1540D1F), UINT32_C(0x1C010E5B), UINT32_C(0x4FE4991A), + UINT32_C(0xFC1C7368), UINT32_C(0x28A3469F), UINT32_C(0xAF118781), + UINT32_C(0xE1EEECD1), UINT32_C(0x99EF3531), UINT32_C(0x1BCCB977) } }, + { { UINT32_C(0xC4DAB7B8), UINT32_C(0x63D3B638), UINT32_C(0x3F7F5BAB), + UINT32_C(0xD92133B6), UINT32_C(0x09FB6069), UINT32_C(0x2573EE20), + UINT32_C(0x890A1686), UINT32_C(0x771FABDF), UINT32_C(0xA77AFFF5), + UINT32_C(0x1D0BA21F), UINT32_C(0xBA3DD2C0), UINT32_C(0x83145FCC) }, + { UINT32_C(0x2D115C20), UINT32_C(0xFA073A81), UINT32_C(0x19176F27), + UINT32_C(0x6AB7A9D3), UINT32_C(0x9AC639EE), UINT32_C(0xAF62CF93), + UINT32_C(0x2CCD1319), UINT32_C(0xF73848B9), UINT32_C(0x3C71659D), + UINT32_C(0x3B613234), UINT32_C(0x10AB3826), UINT32_C(0xF8E0011C) } }, + { { UINT32_C(0x0282FFA5), UINT32_C(0x0501F036), UINT32_C(0xD9E0F15A), + UINT32_C(0xC39A5CF4), UINT32_C(0x9A3D1F3C), UINT32_C(0x48D8C729), + UINT32_C(0x64E18EDA), UINT32_C(0xB5FC136B), UINT32_C(0x7E58FEF0), + UINT32_C(0xE81B53D9), UINT32_C(0xF7B0F28D), UINT32_C(0x0D534055) }, + { UINT32_C(0x7A80619B), UINT32_C(0x47B8DE12), UINT32_C(0x81F9E55D), + UINT32_C(0x60E2A2B3), UINT32_C(0xCF564CC5), UINT32_C(0x6E9624D7), + UINT32_C(0x6BDEDFFF), UINT32_C(0xFDF18A21), UINT32_C(0xC0D5FC82), + UINT32_C(0x3787DE38), UINT32_C(0x497A6B11), UINT32_C(0xCBCAA347) } }, + { { UINT32_C(0xB226465A), UINT32_C(0x6E7EF35E), UINT32_C(0x5F8A2BAF), + UINT32_C(0x4B469919), UINT32_C(0x1120D93F), UINT32_C(0x44B3A3CF), + UINT32_C(0x68F34AD1), UINT32_C(0xB052C8B6), UINT32_C(0xEF7632DD), + UINT32_C(0x27EC574B), UINT32_C(0x685DE26F), UINT32_C(0xAEBEA108) }, + { UINT32_C(0xE39424B6), UINT32_C(0xDA33236B), UINT32_C(0xEBCC22AD), + UINT32_C(0xB1BD94A9), UINT32_C(0x2CDFB5D5), UINT32_C(0x6DDEE6CC), + UINT32_C(0x6F14069A), UINT32_C(0xBDAED927), UINT32_C(0x2A247CB7), + UINT32_C(0x2ADE427C), UINT32_C(0xED156A40), UINT32_C(0xCE96B436) } }, + { { UINT32_C(0x81F3F819), UINT32_C(0xDDDCA360), UINT32_C(0xD419B96A), + UINT32_C(0x4AF4A49F), UINT32_C(0x7CB966B9), UINT32_C(0x746C6525), + UINT32_C(0x6F610023), UINT32_C(0x01E39088), UINT32_C(0x98DD33FC), + UINT32_C(0x05ECB38D), UINT32_C(0x8F84EDF4), UINT32_C(0x962B971B) }, + { UINT32_C(0x6A6F2602), UINT32_C(0xEB32C0A5), UINT32_C(0x562D60F2), + UINT32_C(0xF026AF71), UINT32_C(0x84615FAB), UINT32_C(0xA9E246BF), + UINT32_C(0x75DBAE01), UINT32_C(0xAD967092), UINT32_C(0x3ECE5D07), + UINT32_C(0xBF97C79B), UINT32_C(0x74EAA3D3), UINT32_C(0xE06266C7) } }, + { { UINT32_C(0x2E6DBB6E), UINT32_C(0x161A0157), UINT32_C(0x60FA8F47), + UINT32_C(0xB8AF4904), UINT32_C(0x00197F22), UINT32_C(0xE4336C44), + UINT32_C(0x9CEDCE0E), UINT32_C(0xF811AFFA), UINT32_C(0xF94C2EF1), + UINT32_C(0xB1DD7685), UINT32_C(0xCA957BB0), UINT32_C(0xEEDC0F4B) }, + { UINT32_C(0x4AA76BB1), UINT32_C(0xD319FD57), UINT32_C(0x16CD7CCB), + UINT32_C(0xB3525D7C), UINT32_C(0xA97DD072), UINT32_C(0x7B22DA9C), + UINT32_C(0x38A83E71), UINT32_C(0x99DB84BD), UINT32_C(0xC0EDD8BE), + UINT32_C(0x4939BC8D), UINT32_C(0x903A932C), UINT32_C(0x06D524EA) } }, + { { UINT32_C(0x0E31F639), UINT32_C(0x4BC950EC), UINT32_C(0x6016BE30), + UINT32_C(0xB7ABD3DC), UINT32_C(0x6703DAD0), UINT32_C(0x3B0F4473), + UINT32_C(0x0AC1C4EA), UINT32_C(0xCC405F8B), UINT32_C(0x176C3FEE), + UINT32_C(0x9BED5E57), UINT32_C(0x36AE36C2), UINT32_C(0xF4524810) }, + { UINT32_C(0x15D7B503), UINT32_C(0xC1EDBB83), UINT32_C(0xE30F3657), + UINT32_C(0x943B1156), UINT32_C(0x98377805), UINT32_C(0x984E9EEF), + UINT32_C(0x36CF1DEB), UINT32_C(0x291AE7AC), UINT32_C(0xA9F66DF3), + UINT32_C(0xFED8748C), UINT32_C(0xFEA8FA5D), UINT32_C(0xECA758BB) } }, + }, + { + { { UINT32_C(0x2DD1B249), UINT32_C(0xACC787EF), UINT32_C(0xD82976F1), + UINT32_C(0x736E1030), UINT32_C(0xA01B3649), UINT32_C(0x0A6940FA), + UINT32_C(0xC42341E7), UINT32_C(0xE00B926B), UINT32_C(0xDE8FFD6C), + UINT32_C(0x911508D0), UINT32_C(0x5276B0CB), UINT32_C(0x4DCF8D46) }, + { UINT32_C(0xCC3CAD8D), UINT32_C(0x23AD0A90), UINT32_C(0xADED962A), + UINT32_C(0x2A92E54C), UINT32_C(0xF231BFAF), UINT32_C(0x93FBEC4D), + UINT32_C(0x4798987A), UINT32_C(0x9544BC77), UINT32_C(0x08E29F60), + UINT32_C(0x48084E25), UINT32_C(0x32DE5869), UINT32_C(0x0C0D2F43) } }, + { { UINT32_C(0x3A9ABC13), UINT32_C(0x6778F970), UINT32_C(0x3D2B166B), + UINT32_C(0xFD014FAC), UINT32_C(0x3C6FED60), UINT32_C(0x1FE4FC78), + UINT32_C(0xAA7C69C5), UINT32_C(0x04295FA8), UINT32_C(0x7C123175), + UINT32_C(0xA01DE56D), UINT32_C(0x3D9A713A), UINT32_C(0x0FA0D3A8) }, + { UINT32_C(0xE3E08ADD), UINT32_C(0xA7A6E5E3), UINT32_C(0x1AC58F85), + UINT32_C(0xBD77E94B), UINT32_C(0xB7321A9C), UINT32_C(0x078F6FD2), + UINT32_C(0x911EF6D9), UINT32_C(0x9564601E), UINT32_C(0x415C6BEF), + UINT32_C(0x31C5C1B2), UINT32_C(0xD3212C62), UINT32_C(0xE6C0C91E) } }, + { { UINT32_C(0x0D16022F), UINT32_C(0xBA7BD23C), UINT32_C(0x198BE288), + UINT32_C(0xE9CF4750), UINT32_C(0x47DEEC65), UINT32_C(0x304E3169), + UINT32_C(0x96EEB288), UINT32_C(0xCF65B41F), UINT32_C(0x927E9E3B), + UINT32_C(0x17E99C17), UINT32_C(0xF6630A80), UINT32_C(0x82225546) }, + { UINT32_C(0xCA067BD9), UINT32_C(0x15122B8A), UINT32_C(0xB77B4E98), + UINT32_C(0xE2673205), UINT32_C(0x9407CA63), UINT32_C(0x13037565), + UINT32_C(0x8B621602), UINT32_C(0x53624F54), UINT32_C(0xEAE4BD06), + UINT32_C(0x96AF2CB1), UINT32_C(0x8FA20829), UINT32_C(0x576ECD1C) } }, + { { UINT32_C(0x7E02D2D0), UINT32_C(0xA551CE10), UINT32_C(0x9D13DBC7), + UINT32_C(0x1584ED24), UINT32_C(0x4DA7B6D8), UINT32_C(0x082017AD), + UINT32_C(0xE054BC48), UINT32_C(0x81918A8F), UINT32_C(0x572DC384), + UINT32_C(0x677DB48E), UINT32_C(0x6155484C), UINT32_C(0x2EF82296) }, + { UINT32_C(0x41B9C231), UINT32_C(0xC3DB14C6), UINT32_C(0x4A766192), + UINT32_C(0x910A87D1), UINT32_C(0x10AB8E0F), UINT32_C(0x93D5CC86), + UINT32_C(0xAE57CA1B), UINT32_C(0x4194D548), UINT32_C(0x267FC37A), + UINT32_C(0xFAF3A1D6), UINT32_C(0x13B87C97), UINT32_C(0x70EC2364) } }, + { { UINT32_C(0x5E12756A), UINT32_C(0x064B565B), UINT32_C(0xAE49C98E), + UINT32_C(0x953B7BD1), UINT32_C(0xF7001D91), UINT32_C(0xE0CE8284), + UINT32_C(0xF31108D0), UINT32_C(0x1546060B), UINT32_C(0x6779B6E2), + UINT32_C(0xDBC2C3F4), UINT32_C(0xE0DD07CF), UINT32_C(0x157AA47D) }, + { UINT32_C(0xF23B261E), UINT32_C(0xBF4A1C6F), UINT32_C(0x654F4BE5), + UINT32_C(0x5B8EED30), UINT32_C(0x6B20CCD8), UINT32_C(0xDF5896D3), + UINT32_C(0x559ED23D), UINT32_C(0x56920E2C), UINT32_C(0xFA6E3E27), + UINT32_C(0x901F342E), UINT32_C(0x896CA082), UINT32_C(0x745C747C) } }, + { { UINT32_C(0x2944EC84), UINT32_C(0xDBCCD575), UINT32_C(0xA5FF65FE), + UINT32_C(0x54A2A935), UINT32_C(0x1A1319B6), UINT32_C(0x88C92A5E), + UINT32_C(0x82DA96C1), UINT32_C(0x9537C28F), UINT32_C(0x35F93C46), + UINT32_C(0xB6836474), UINT32_C(0x65B0846C), UINT32_C(0xEC526A1D) }, + { UINT32_C(0xF382C412), UINT32_C(0x6F12AFBD), UINT32_C(0x9E99FA06), + UINT32_C(0x5EBC81D8), UINT32_C(0x869B93BD), UINT32_C(0x97B5D672), + UINT32_C(0x377E12AA), UINT32_C(0x2983C310), UINT32_C(0x24D681EA), + UINT32_C(0x48759681), UINT32_C(0x287FD767), UINT32_C(0x1E0BD106) } }, + { { UINT32_C(0x7231247F), UINT32_C(0x0AC75A3E), UINT32_C(0xEF27AD3A), + UINT32_C(0x65C20DE6), UINT32_C(0xBD02EEE5), UINT32_C(0x87EB6CF1), + UINT32_C(0x00147E03), UINT32_C(0x264ACA7A), UINT32_C(0xAE2A9437), + UINT32_C(0xEBC78581), UINT32_C(0x6316BFA5), UINT32_C(0x9929964E) }, + { UINT32_C(0x9AF207EF), UINT32_C(0xDC09E040), UINT32_C(0x0C9D8658), + UINT32_C(0x3ECFFE2D), UINT32_C(0xDFB43D38), UINT32_C(0x547EA735), + UINT32_C(0xD04B1B20), UINT32_C(0x5485247B), UINT32_C(0xBFD8B609), + UINT32_C(0xB18D3F02), UINT32_C(0xCCE73705), UINT32_C(0xEEB3E805) } }, + { { UINT32_C(0xDB93850F), UINT32_C(0xDAB1A525), UINT32_C(0x8365B7D5), + UINT32_C(0x18ADAA23), UINT32_C(0x113FC8C7), UINT32_C(0x58485C90), + UINT32_C(0x348AD323), UINT32_C(0x80C3DBB9), UINT32_C(0xE16ADCA1), + UINT32_C(0xAF892FB5), UINT32_C(0x979F005A), UINT32_C(0x2183C879) }, + { UINT32_C(0x0643A99E), UINT32_C(0x20FA1A94), UINT32_C(0x1A1609CB), + UINT32_C(0x2741221C), UINT32_C(0x3C2FBDDC), UINT32_C(0x1C1687E5), + UINT32_C(0xD420D6CF), UINT32_C(0xDCCF329E), UINT32_C(0x2B7197D1), + UINT32_C(0x75D5577D), UINT32_C(0xC8729D9C), UINT32_C(0x4C3C3875) } }, + { { UINT32_C(0xE5CBDCB9), UINT32_C(0x5E79F995), UINT32_C(0xA742FCC7), + UINT32_C(0x03139824), UINT32_C(0x239EF4A1), UINT32_C(0x6D0C214A), + UINT32_C(0x401A2944), UINT32_C(0x53A27952), UINT32_C(0xC10BCDF0), + UINT32_C(0xF42A1B34), UINT32_C(0x7CF38061), UINT32_C(0x426BAA43) }, + { UINT32_C(0xA96AD0C8), UINT32_C(0x16A53139), UINT32_C(0x6BAD5301), + UINT32_C(0x627F1D31), UINT32_C(0x4ACCD627), UINT32_C(0x5AF74877), + UINT32_C(0xB55B0FB8), UINT32_C(0x3C58A1C5), UINT32_C(0xF4399A6A), + UINT32_C(0xFAA57B91), UINT32_C(0xC28094B8), UINT32_C(0xBAD283FB) } }, + { { UINT32_C(0x83E10A93), UINT32_C(0xBA32AC61), UINT32_C(0xEC06BDB0), + UINT32_C(0x1C91F6B4), UINT32_C(0x65F60C93), UINT32_C(0x42E6CFBC), + UINT32_C(0x2C0CDCBE), UINT32_C(0xEFE33BC8), UINT32_C(0x4D6414F2), + UINT32_C(0xE0FE1D09), UINT32_C(0x76FA5C5B), UINT32_C(0x4C112316) }, + { UINT32_C(0x2E26200A), UINT32_C(0x812C1DC6), UINT32_C(0xEE879D25), + UINT32_C(0xD6C413C5), UINT32_C(0xBCA8BAFE), UINT32_C(0xBEADE255), + UINT32_C(0xCE2BA0E7), UINT32_C(0x0EAF4AE2), UINT32_C(0xC4F4408A), + UINT32_C(0x66E9FFB0), UINT32_C(0x9782C7AD), UINT32_C(0xB36A86D7) } }, + { { UINT32_C(0xBAD8D1C7), UINT32_C(0x10FCD1F4), UINT32_C(0x4502F645), + UINT32_C(0xC903816A), UINT32_C(0xA503B895), UINT32_C(0x7FAC1CC1), + UINT32_C(0x0778900C), UINT32_C(0x8BCD6041), UINT32_C(0x5BCF2784), + UINT32_C(0x5A5F2202), UINT32_C(0x10EDB896), UINT32_C(0x9B157E87) }, + { UINT32_C(0xF602A8B1), UINT32_C(0x4C58DA69), UINT32_C(0x59EC9D7E), + UINT32_C(0xD55132F8), UINT32_C(0xA26D4870), UINT32_C(0x155B719A), + UINT32_C(0x36441746), UINT32_C(0x25AAFCA3), UINT32_C(0xDD3B6B30), + UINT32_C(0x01F83338), UINT32_C(0x551917CC), UINT32_C(0xD52BB5C1) } }, + { { UINT32_C(0x6135066A), UINT32_C(0xA0B6207B), UINT32_C(0x2AEC8CBD), + UINT32_C(0xB3409F84), UINT32_C(0x19D87DF0), UINT32_C(0x5EBFD436), + UINT32_C(0xE8526DE2), UINT32_C(0xCB4C209B), UINT32_C(0x21E1A230), + UINT32_C(0xD764085B), UINT32_C(0x0899964A), UINT32_C(0x96F91554) }, + { UINT32_C(0xA57D122A), UINT32_C(0xB0BEC8EF), UINT32_C(0x5D9D0B33), + UINT32_C(0xC572EC56), UINT32_C(0xCFA7C72C), UINT32_C(0xEBE2A780), + UINT32_C(0x9EF3295C), UINT32_C(0x52D40CDB), UINT32_C(0x0DE74DFE), + UINT32_C(0x64004584), UINT32_C(0xC0809716), UINT32_C(0xA6846432) } }, + { { UINT32_C(0x02C979BC), UINT32_C(0x0D09E8CD), UINT32_C(0x409F4F2A), + UINT32_C(0xEC4B21F6), UINT32_C(0x13FB07CA), UINT32_C(0x68125C70), + UINT32_C(0x6FDFA72A), UINT32_C(0x1C4CFC17), UINT32_C(0x04539FCD), + UINT32_C(0xC9E71B9E), UINT32_C(0x8BA70797), UINT32_C(0x94B7103D) }, + { UINT32_C(0xB33FDE83), UINT32_C(0x6B81E82F), UINT32_C(0xEABAFD4B), + UINT32_C(0x7CA9A8CA), UINT32_C(0xEAB819CE), UINT32_C(0xADD85A67), + UINT32_C(0x98E99FFC), UINT32_C(0xAEC25483), UINT32_C(0x274A07B6), + UINT32_C(0x938D6440), UINT32_C(0x564A6AA0), UINT32_C(0x0A5C7097) } }, + { { UINT32_C(0x2F4FCEB6), UINT32_C(0x7284FF50), UINT32_C(0x78D0D5CB), + UINT32_C(0x0A28715A), UINT32_C(0xBFCE187C), UINT32_C(0xE70B7014), + UINT32_C(0x7A17148D), UINT32_C(0xA6B538F5), UINT32_C(0xDD427166), + UINT32_C(0x1DAB07C9), UINT32_C(0x149D23CA), UINT32_C(0x5C5578B0) }, + { UINT32_C(0x875B5EDE), UINT32_C(0x875E2056), UINT32_C(0x02C893B9), + UINT32_C(0xCBF44B6D), UINT32_C(0x5C2993FB), UINT32_C(0x5715A77E), + UINT32_C(0x3410597E), UINT32_C(0xAF328146), UINT32_C(0x42DC49DF), + UINT32_C(0x65DF418F), UINT32_C(0xA9EE52F6), UINT32_C(0x7AC9C720) } }, + { { UINT32_C(0x62955486), UINT32_C(0xB1C9AA07), UINT32_C(0x245061D7), + UINT32_C(0xCBF35BE3), UINT32_C(0x8CF4DDC0), UINT32_C(0x811E1BD3), + UINT32_C(0x948F7C84), UINT32_C(0xD9D4589C), UINT32_C(0xCB0F996D), + UINT32_C(0x30D09A0F), UINT32_C(0x590E7704), UINT32_C(0x1A1B3B7A) }, + { UINT32_C(0x2082768D), UINT32_C(0xA848E349), UINT32_C(0x9A249DF4), + UINT32_C(0x9FEBD492), UINT32_C(0x5F20439A), UINT32_C(0x503420AF), + UINT32_C(0x8E2BFCD4), UINT32_C(0x0CBE52B6), UINT32_C(0x118C91B2), + UINT32_C(0xB1D5E261), UINT32_C(0x71D8F2BC), UINT32_C(0x93CFF6DA) } }, + { { UINT32_C(0x8AB58944), UINT32_C(0x5F5BC06B), UINT32_C(0x4979882D), + UINT32_C(0xE4BED538), UINT32_C(0xD79B0EB1), UINT32_C(0x57C30362), + UINT32_C(0xEF7C56D8), UINT32_C(0x391AE2C1), UINT32_C(0xADD98625), + UINT32_C(0x28BC2E97), UINT32_C(0x1B257107), UINT32_C(0xFA8E86B8) }, + { UINT32_C(0x6118C715), UINT32_C(0x5E4859F8), UINT32_C(0x524C71DD), + UINT32_C(0x91C83324), UINT32_C(0x6D2F5E6D), UINT32_C(0xFB209243), + UINT32_C(0x2A900A43), UINT32_C(0x6B4FE21F), UINT32_C(0x32A73C1F), + UINT32_C(0x241F75D6), UINT32_C(0x5AE89613), UINT32_C(0xF5BC4629) } }, + } +}; + +/*- + * Finite field inversion. + * Computed with Bernstein-Yang algorithm. + * https://tches.iacr.org/index.php/TCHES/article/view/8298 + * Based on https://github.com/mit-plv/fiat-crypto/tree/master/inversion/c + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp384r1_inv(fe_t output, const fe_t t1) +{ + int i; + fe_t v1, r1, v2; + limb_t *r2 = output; + limb_t f1[LIMB_CNT + 1], g1[LIMB_CNT + 1], f2[LIMB_CNT + 1], + g2[LIMB_CNT + 1]; + limb_t d2, d1 = 1; + + fe_copy(g1, t1); + g1[LIMB_CNT] = 0; + fe_copy(f1, const_psat); + f1[LIMB_CNT] = 0; + fe_copy(r1, const_one); + fe_set_zero(v1); + + /* 1110 divstep iterations */ + for (i = 0; i < 555; i++) { + fiat_secp384r1_divstep(&d2, f2, g2, v2, r2, d1, f1, g1, v1, r1); + fiat_secp384r1_divstep(&d1, f1, g1, v1, r1, d2, f2, g2, v2, r2); + } + + fiat_secp384r1_opp(output, v1); + fiat_secp384r1_selectznz(output, f1[LIMB_CNT] >> (LIMB_BITS - 1), v1, + output); + fiat_secp384r1_mul(output, output, const_divstep); +} + +/*- + * Q := 2P, both projective, Q and P same pointers OK + * Autogenerated: op3/dbl_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 6 + * ASSERT: a = -3 + */ +static void +point_double(pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X = P->X; + const limb_t *Y = P->Y; + const limb_t *Z = P->Z; + limb_t *X3 = Q->X; + limb_t *Y3 = Q->Y; + limb_t *Z3 = Q->Z; + + /* the curve arith formula */ + fiat_secp384r1_square(t0, X); + fiat_secp384r1_square(t1, Y); + fiat_secp384r1_square(t2, Z); + fiat_secp384r1_mul(t3, X, Y); + fiat_secp384r1_add(t3, t3, t3); + fiat_secp384r1_mul(t4, Y, Z); + fiat_secp384r1_mul(Z3, X, Z); + fiat_secp384r1_add(Z3, Z3, Z3); + fiat_secp384r1_mul(Y3, b, t2); + fiat_secp384r1_sub(Y3, Y3, Z3); + fiat_secp384r1_add(X3, Y3, Y3); + fiat_secp384r1_add(Y3, X3, Y3); + fiat_secp384r1_sub(X3, t1, Y3); + fiat_secp384r1_add(Y3, t1, Y3); + fiat_secp384r1_mul(Y3, X3, Y3); + fiat_secp384r1_mul(X3, X3, t3); + fiat_secp384r1_add(t3, t2, t2); + fiat_secp384r1_add(t2, t2, t3); + fiat_secp384r1_mul(Z3, b, Z3); + fiat_secp384r1_sub(Z3, Z3, t2); + fiat_secp384r1_sub(Z3, Z3, t0); + fiat_secp384r1_add(t3, Z3, Z3); + fiat_secp384r1_add(Z3, Z3, t3); + fiat_secp384r1_add(t3, t0, t0); + fiat_secp384r1_add(t0, t3, t0); + fiat_secp384r1_sub(t0, t0, t2); + fiat_secp384r1_mul(t0, t0, Z3); + fiat_secp384r1_add(Y3, Y3, t0); + fiat_secp384r1_add(t0, t4, t4); + fiat_secp384r1_mul(Z3, t0, Z3); + fiat_secp384r1_sub(X3, X3, Z3); + fiat_secp384r1_mul(Z3, t0, t1); + fiat_secp384r1_add(Z3, Z3, Z3); + fiat_secp384r1_add(Z3, Z3, Z3); +} + +/*- + * R := Q + P where R and Q are projective, P affine. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_mixed.op3 + * https://eprint.iacr.org/2015/1060 Alg 5 + * ASSERT: a = -3 + */ +static void +point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + fe_t X3; + fe_t Y3; + fe_t Z3; + limb_t nz; + + /* check P for affine inf */ + fiat_secp384r1_nonzero(&nz, P->Y); + + /* the curve arith formula */ + fiat_secp384r1_mul(t0, X1, X2); + fiat_secp384r1_mul(t1, Y1, Y2); + fiat_secp384r1_add(t3, X2, Y2); + fiat_secp384r1_add(t4, X1, Y1); + fiat_secp384r1_mul(t3, t3, t4); + fiat_secp384r1_add(t4, t0, t1); + fiat_secp384r1_sub(t3, t3, t4); + fiat_secp384r1_mul(t4, Y2, Z1); + fiat_secp384r1_add(t4, t4, Y1); + fiat_secp384r1_mul(Y3, X2, Z1); + fiat_secp384r1_add(Y3, Y3, X1); + fiat_secp384r1_mul(Z3, b, Z1); + fiat_secp384r1_sub(X3, Y3, Z3); + fiat_secp384r1_add(Z3, X3, X3); + fiat_secp384r1_add(X3, X3, Z3); + fiat_secp384r1_sub(Z3, t1, X3); + fiat_secp384r1_add(X3, t1, X3); + fiat_secp384r1_mul(Y3, b, Y3); + fiat_secp384r1_add(t1, Z1, Z1); + fiat_secp384r1_add(t2, t1, Z1); + fiat_secp384r1_sub(Y3, Y3, t2); + fiat_secp384r1_sub(Y3, Y3, t0); + fiat_secp384r1_add(t1, Y3, Y3); + fiat_secp384r1_add(Y3, t1, Y3); + fiat_secp384r1_add(t1, t0, t0); + fiat_secp384r1_add(t0, t1, t0); + fiat_secp384r1_sub(t0, t0, t2); + fiat_secp384r1_mul(t1, t4, Y3); + fiat_secp384r1_mul(t2, t0, Y3); + fiat_secp384r1_mul(Y3, X3, Z3); + fiat_secp384r1_add(Y3, Y3, t2); + fiat_secp384r1_mul(X3, t3, X3); + fiat_secp384r1_sub(X3, X3, t1); + fiat_secp384r1_mul(Z3, t4, Z3); + fiat_secp384r1_mul(t1, t3, t0); + fiat_secp384r1_add(Z3, Z3, t1); + + /* if P is inf, throw all that away and take Q */ + fiat_secp384r1_selectznz(R->X, nz, Q->X, X3); + fiat_secp384r1_selectznz(R->Y, nz, Q->Y, Y3); + fiat_secp384r1_selectznz(R->Z, nz, Q->Z, Z3); +} + +/*- + * R := Q + P all projective. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 4 + * ASSERT: a = -3 + */ +static void +point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4, t5; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + const limb_t *Z2 = P->Z; + limb_t *X3 = R->X; + limb_t *Y3 = R->Y; + limb_t *Z3 = R->Z; + + /* the curve arith formula */ + fiat_secp384r1_mul(t0, X1, X2); + fiat_secp384r1_mul(t1, Y1, Y2); + fiat_secp384r1_mul(t2, Z1, Z2); + fiat_secp384r1_add(t3, X1, Y1); + fiat_secp384r1_add(t4, X2, Y2); + fiat_secp384r1_mul(t3, t3, t4); + fiat_secp384r1_add(t4, t0, t1); + fiat_secp384r1_sub(t3, t3, t4); + fiat_secp384r1_add(t4, Y1, Z1); + fiat_secp384r1_add(t5, Y2, Z2); + fiat_secp384r1_mul(t4, t4, t5); + fiat_secp384r1_add(t5, t1, t2); + fiat_secp384r1_sub(t4, t4, t5); + fiat_secp384r1_add(X3, X1, Z1); + fiat_secp384r1_add(Y3, X2, Z2); + fiat_secp384r1_mul(X3, X3, Y3); + fiat_secp384r1_add(Y3, t0, t2); + fiat_secp384r1_sub(Y3, X3, Y3); + fiat_secp384r1_mul(Z3, b, t2); + fiat_secp384r1_sub(X3, Y3, Z3); + fiat_secp384r1_add(Z3, X3, X3); + fiat_secp384r1_add(X3, X3, Z3); + fiat_secp384r1_sub(Z3, t1, X3); + fiat_secp384r1_add(X3, t1, X3); + fiat_secp384r1_mul(Y3, b, Y3); + fiat_secp384r1_add(t1, t2, t2); + fiat_secp384r1_add(t2, t1, t2); + fiat_secp384r1_sub(Y3, Y3, t2); + fiat_secp384r1_sub(Y3, Y3, t0); + fiat_secp384r1_add(t1, Y3, Y3); + fiat_secp384r1_add(Y3, t1, Y3); + fiat_secp384r1_add(t1, t0, t0); + fiat_secp384r1_add(t0, t1, t0); + fiat_secp384r1_sub(t0, t0, t2); + fiat_secp384r1_mul(t1, t4, Y3); + fiat_secp384r1_mul(t2, t0, Y3); + fiat_secp384r1_mul(Y3, X3, Z3); + fiat_secp384r1_add(Y3, Y3, t2); + fiat_secp384r1_mul(X3, t3, X3); + fiat_secp384r1_sub(X3, X3, t1); + fiat_secp384r1_mul(Z3, t4, Z3); + fiat_secp384r1_mul(t1, t3, t0); + fiat_secp384r1_add(Z3, Z3, t1); +} + +/* constants */ +#define RADIX 5 +#define DRADIX (1 << RADIX) +#define DRADIX_WNAF ((DRADIX) << 1) + +/*- + * precomp for wnaf scalar multiplication: + * precomp[0] = 1P + * precomp[1] = 3P + * precomp[2] = 5P + * precomp[3] = 7P + * precomp[4] = 9P + * ... + */ +static void +precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P) +{ + int i; + + fe_copy(precomp[0].X, P->X); + fe_copy(precomp[0].Y, P->Y); + fe_copy(precomp[0].Z, const_one); + point_double(&precomp[DRADIX / 2 - 1], &precomp[0]); + + for (i = 1; i < DRADIX / 2; i++) + point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]); +} + +/* fetch a scalar bit */ +static int +scalar_get_bit(const unsigned char in[48], int idx) +{ + int widx, rshift; + + widx = idx >> 3; + rshift = idx & 0x7; + + if (idx < 0 || widx >= 48) + return 0; + + return (in[widx] >> rshift) & 0x1; +} + +/*- + * Compute "regular" wnaf representation of a scalar. + * See "Exponent Recoding and Regular Exponentiation Algorithms", + * Tunstall et al., AfricaCrypt 2009, Alg 6. + * It forces an odd scalar and outputs digits in + * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...} + * i.e. signed odd digits with _no zeroes_ -- that makes it "regular". + */ +static void +scalar_rwnaf(int8_t out[77], const unsigned char in[48]) +{ + int i; + int8_t window, d; + + window = (in[0] & (DRADIX_WNAF - 1)) | 1; + for (i = 0; i < 76; i++) { + d = (window & (DRADIX_WNAF - 1)) - DRADIX; + out[i] = d; + window = (window - d) >> RADIX; + window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1; + window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2; + window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3; + window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4; + window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5; + } + out[i] = window; +} + +/*- + * Compute "textbook" wnaf representation of a scalar. + * NB: not constant time + */ +static void +scalar_wnaf(int8_t out[385], const unsigned char in[48]) +{ + int i; + int8_t window, d; + + window = in[0] & (DRADIX_WNAF - 1); + for (i = 0; i < 385; i++) { + d = 0; + if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX)) + d -= DRADIX_WNAF; + out[i] = d; + window = (window - d) >> 1; + window += scalar_get_bit(in, i + 1 + RADIX) << RADIX; + } +} + +/*- + * Simultaneous scalar multiplication: interleaved "textbook" wnaf. + * NB: not constant time + */ +static void +var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48], + const unsigned char b[48], const pt_aff_t *P) +{ + int i, d, is_neg, is_inf = 1, flipped = 0; + int8_t anaf[385] = { 0 }; + int8_t bnaf[385] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_wnaf(anaf, a); + scalar_wnaf(bnaf, b); + + for (i = 384; i >= 0; i--) { + if (!is_inf) + point_double(&Q, &Q); + if ((d = bnaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp384r1_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &precomp[d].X); + fe_copy(Q.Y, &precomp[d].Y); + fe_copy(Q.Z, &precomp[d].Z); + is_inf = 0; + } else + point_add_proj(&Q, &Q, &precomp[d]); + } + if ((d = anaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp384r1_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &lut_cmb[0][d].X); + fe_copy(Q.Y, &lut_cmb[0][d].Y); + fe_copy(Q.Z, const_one); + is_inf = 0; + } else + point_add_mixed(&Q, &Q, &lut_cmb[0][d]); + } + } + + if (is_inf) { + /* initialize accumulator to inf: all-zero scalars */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + } + + if (flipped) { + /* correct sign */ + fiat_secp384r1_opp(Q.Y, Q.Y); + } + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp384r1_inv(Q.Z, Q.Z); + fiat_secp384r1_mul(out->X, Q.X, Q.Z); + fiat_secp384r1_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. + */ +static void +var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48], + const pt_aff_t *P) +{ + int i, j, d, diff, is_neg; + int8_t rnaf[77] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_rwnaf(rnaf, scalar); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + /* initialize accumulator to high digit */ + d = (rnaf[76] - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp384r1_selectznz(Q.X, diff, Q.X, precomp[j].X); + fiat_secp384r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y); + fiat_secp384r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z); + } + + for (i = 75; i >= 0; i--) { + for (j = 0; j < RADIX; j++) + point_double(&Q, &Q); + d = rnaf[i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp384r1_selectznz(lut.X, diff, lut.X, precomp[j].X); + fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y); + fiat_secp384r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z); + } + /* negate lut point if digit is negative */ + fiat_secp384r1_opp(out->Y, lut.Y); + fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_proj(&Q, &Q, &lut); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, precomp[0].X); + fiat_secp384r1_opp(lut.Y, precomp[0].Y); + fe_copy(lut.Z, precomp[0].Z); + point_add_proj(&lut, &lut, &Q); + fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X); + fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y); + fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp384r1_inv(Q.Z, Q.Z); + fiat_secp384r1_mul(out->X, Q.X, Q.Z); + fiat_secp384r1_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Fixed scalar multiplication: comb with interleaving. + */ +static void +fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) +{ + int i, j, k, d, diff, is_neg = 0; + int8_t rnaf[77] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; + + scalar_rwnaf(rnaf, scalar); + + /* initalize accumulator to inf */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + for (i = 3; i >= 0; i--) { + for (j = 0; i != 3 && j < RADIX; j++) + point_double(&Q, &Q); + for (j = 0; j < 21; j++) { + if (j * 4 + i > 76) + continue; + d = rnaf[j * 4 + i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (k = 0; k < DRADIX / 2; k++) { + diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp384r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X); + fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y); + } + /* negate lut point if digit is negative */ + fiat_secp384r1_opp(out->Y, lut.Y); + fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_mixed(&Q, &Q, &lut); + } + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, lut_cmb[0][0].X); + fiat_secp384r1_opp(lut.Y, lut_cmb[0][0].Y); + point_add_mixed(&R, &Q, &lut); + fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X); + fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y); + fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp384r1_inv(Q.Z, Q.Z); + fiat_secp384r1_mul(out->X, Q.X, Q.Z); + fiat_secp384r1_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Wrapper: simultaneous scalar mutiplication. + * outx, outy := a * G + b * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char a[48], + const unsigned char b[48], + const unsigned char inx[48], + const unsigned char iny[48]) +{ + pt_aff_t P; + + fiat_secp384r1_from_bytes(P.X, inx); + fiat_secp384r1_from_bytes(P.Y, iny); + fiat_secp384r1_to_montgomery(P.X, P.X); + fiat_secp384r1_to_montgomery(P.Y, P.Y); + /* simultaneous scalar multiplication */ + var_smul_wnaf_two(&P, a, b, &P); + + fiat_secp384r1_from_montgomery(P.X, P.X); + fiat_secp384r1_from_montgomery(P.Y, P.Y); + fiat_secp384r1_to_bytes(outx, P.X); + fiat_secp384r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: fixed scalar mutiplication. + * outx, outy := scalar * G + * Everything is LE byte ordering. + */ +static void +point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48]) +{ + pt_aff_t P; + + /* fixed scmul function */ + fixed_smul_cmb(&P, scalar); + fiat_secp384r1_from_montgomery(P.X, P.X); + fiat_secp384r1_from_montgomery(P.Y, P.Y); + fiat_secp384r1_to_bytes(outx, P.X); + fiat_secp384r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: variable point scalar mutiplication. + * outx, outy := scalar * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48], + const unsigned char inx[48], + const unsigned char iny[48]) +{ + pt_aff_t P; + + fiat_secp384r1_from_bytes(P.X, inx); + fiat_secp384r1_from_bytes(P.Y, iny); + fiat_secp384r1_to_montgomery(P.X, P.X); + fiat_secp384r1_to_montgomery(P.Y, P.Y); + /* var scmul function */ + var_smul_rwnaf(&P, scalar, &P); + fiat_secp384r1_from_montgomery(P.X, P.X); + fiat_secp384r1_from_montgomery(P.Y, P.Y); + fiat_secp384r1_to_bytes(outx, P.X); + fiat_secp384r1_to_bytes(outy, P.Y); +} + +#undef RADIX +#include "ecp.h" +#include "mpi-priv.h" +#include "mplogic.h" + +/*- + * reverse bytes -- total hack + */ +#define MP_BE2LE(a) \ + do { \ + unsigned char z_bswap; \ + z_bswap = a[0]; \ + a[0] = a[47]; \ + a[47] = z_bswap; \ + z_bswap = a[1]; \ + a[1] = a[46]; \ + a[46] = z_bswap; \ + z_bswap = a[2]; \ + a[2] = a[45]; \ + a[45] = z_bswap; \ + z_bswap = a[3]; \ + a[3] = a[44]; \ + a[44] = z_bswap; \ + z_bswap = a[4]; \ + a[4] = a[43]; \ + a[43] = z_bswap; \ + z_bswap = a[5]; \ + a[5] = a[42]; \ + a[42] = z_bswap; \ + z_bswap = a[6]; \ + a[6] = a[41]; \ + a[41] = z_bswap; \ + z_bswap = a[7]; \ + a[7] = a[40]; \ + a[40] = z_bswap; \ + z_bswap = a[8]; \ + a[8] = a[39]; \ + a[39] = z_bswap; \ + z_bswap = a[9]; \ + a[9] = a[38]; \ + a[38] = z_bswap; \ + z_bswap = a[10]; \ + a[10] = a[37]; \ + a[37] = z_bswap; \ + z_bswap = a[11]; \ + a[11] = a[36]; \ + a[36] = z_bswap; \ + z_bswap = a[12]; \ + a[12] = a[35]; \ + a[35] = z_bswap; \ + z_bswap = a[13]; \ + a[13] = a[34]; \ + a[34] = z_bswap; \ + z_bswap = a[14]; \ + a[14] = a[33]; \ + a[33] = z_bswap; \ + z_bswap = a[15]; \ + a[15] = a[32]; \ + a[32] = z_bswap; \ + z_bswap = a[16]; \ + a[16] = a[31]; \ + a[31] = z_bswap; \ + z_bswap = a[17]; \ + a[17] = a[30]; \ + a[30] = z_bswap; \ + z_bswap = a[18]; \ + a[18] = a[29]; \ + a[29] = z_bswap; \ + z_bswap = a[19]; \ + a[19] = a[28]; \ + a[28] = z_bswap; \ + z_bswap = a[20]; \ + a[20] = a[27]; \ + a[27] = z_bswap; \ + z_bswap = a[21]; \ + a[21] = a[26]; \ + a[26] = z_bswap; \ + z_bswap = a[22]; \ + a[22] = a[25]; \ + a[25] = z_bswap; \ + z_bswap = a[23]; \ + a[23] = a[24]; \ + a[24] = z_bswap; \ + } while (0) + +static mp_err +point_mul_g_secp384r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[48]; + unsigned char b_y[48]; + unsigned char b_n[48]; + mp_err res; + + ARGCHK(n != NULL && out_x != NULL && out_y != NULL, MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 384 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48)); + MP_BE2LE(b_n); + point_mul_g_secp384r1(b_x, b_y, b_n); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_secp384r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[48]; + unsigned char b_y[48]; + unsigned char b_n[48]; + mp_err res; + + ARGCHK(n != NULL && in_x != NULL && in_y != NULL && out_x != NULL && + out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 384 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 48)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n); + point_mul_secp384r1(b_x, b_y, b_n, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_two_secp384r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) +{ + unsigned char b_x[48]; + unsigned char b_y[48]; + unsigned char b_n1[48]; + unsigned char b_n2[48]; + mp_err res; + + /* If n2 == NULL or 0, this is just a base-point multiplication. */ + if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) + return point_mul_g_secp384r1_wrap(n1, out_x, out_y, group); + + /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ + if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) + return point_mul_secp384r1_wrap(n2, in_x, in_y, out_x, out_y, group); + + ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n1) > 384 || mp_cmp_z(n1) != MP_GT || + mpl_significant_bits(n2) > 384 || mp_cmp_z(n2) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n1, b_n1, 48)); + MP_CHECKOK(mp_to_fixlen_octets(n2, b_n2, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 48)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 48)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n1); + MP_BE2LE(b_n2); + point_mul_two_secp384r1(b_x, b_y, b_n1, b_n2, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48)); + +CLEANUP: + return res; +} + +mp_err +ec_group_set_secp384r1(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P384) { + group->base_point_mul = &point_mul_g_secp384r1_wrap; + group->point_mul = &point_mul_secp384r1_wrap; + group->points_mul = &point_mul_two_secp384r1_wrap; + } + return MP_OKAY; +} + +#endif /* __SIZEOF_INT128__ */ diff --git a/security/nss/lib/freebl/ecl/ecp_secp521r1.c b/security/nss/lib/freebl/ecl/ecp_secp521r1.c new file mode 100644 index 0000000000..d99a0bde45 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_secp521r1.c @@ -0,0 +1,12082 @@ +/* Autogenerated: ECCKiila https://gitlab.com/nisec/ecckiila */ +/*- + * MIT License + * - + * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley + * - + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * - + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * - + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__SIZEOF_INT128__) && !defined(PEDANTIC) + +#include +#include +#define LIMB_BITS 64 +#define LIMB_CNT 9 +/* Field elements */ +typedef uint64_t fe_t[LIMB_CNT]; +typedef uint64_t limb_t; + +#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t)) +#define fe_set_zero(d) memset(d, 0, sizeof(fe_t)) + +/* Projective points */ +typedef struct { + fe_t X; + fe_t Y; + fe_t Z; +} pt_prj_t; + +/* Affine points */ +typedef struct { + fe_t X; + fe_t Y; +} pt_aff_t; + +/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */ +/*- + * MIT License + * + * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file). + * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Autogenerated: unsaturated_solinas --static --use-value-barrier secp521r1 64 9 '2^521 - 1' */ +/* curve description: secp521r1 */ +/* machine_wordsize = 64 (from "64") */ +/* requested operations: (all) */ +/* n = 9 (from "9") */ +/* s-c = 2^521 - [(1, 1)] (from "2^521 - 1") */ +/* tight_bounds_multiplier = 1 (from "") */ +/* */ +/* Computed values: */ +/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1] */ +/* eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */ +/* balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe] */ + +#include +typedef unsigned char fiat_secp521r1_uint1; +typedef signed char fiat_secp521r1_int1; +#ifdef __GNUC__ +#define FIAT_SECP521R1_FIAT_EXTENSION __extension__ +#define FIAT_SECP521R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP521R1_FIAT_EXTENSION +#define FIAT_SECP521R1_FIAT_INLINE +#endif + +FIAT_SECP521R1_FIAT_EXTENSION typedef signed __int128 fiat_secp521r1_int128; +FIAT_SECP521R1_FIAT_EXTENSION typedef unsigned __int128 fiat_secp521r1_uint128; + +/* The type fiat_secp521r1_loose_field_element is a field element with loose bounds. */ +/* Bounds: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] */ +typedef uint64_t fiat_secp521r1_loose_field_element[9]; + +/* The type fiat_secp521r1_tight_field_element is a field element with tight bounds. */ +/* Bounds: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */ +typedef uint64_t fiat_secp521r1_tight_field_element[9]; + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + +#if !defined(FIAT_SECP521R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +static __inline__ uint64_t +fiat_secp521r1_value_barrier_u64(uint64_t a) +{ + __asm__("" + : "+r"(a) + : /* no inputs */); + return a; +} +#else +#define fiat_secp521r1_value_barrier_u64(x) (x) +#endif + +/* + * The function fiat_secp521r1_addcarryx_u58 is an addition with carry. + * + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^58 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^58⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x3ffffffffffffff] + * arg3: [0x0 ~> 0x3ffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0x3ffffffffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_addcarryx_u58(uint64_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint64_t arg2, uint64_t arg3) +{ + uint64_t x1; + uint64_t x2; + fiat_secp521r1_uint1 x3; + x1 = ((arg1 + arg2) + arg3); + x2 = (x1 & UINT64_C(0x3ffffffffffffff)); + x3 = (fiat_secp521r1_uint1)(x1 >> 58); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp521r1_subborrowx_u58 is a subtraction with borrow. + * + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^58 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^58⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x3ffffffffffffff] + * arg3: [0x0 ~> 0x3ffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0x3ffffffffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_subborrowx_u58(uint64_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint64_t arg2, uint64_t arg3) +{ + int64_t x1; + fiat_secp521r1_int1 x2; + uint64_t x3; + x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3); + x2 = (fiat_secp521r1_int1)(x1 >> 58); + x3 = (x1 & UINT64_C(0x3ffffffffffffff)); + *out1 = x3; + *out2 = (fiat_secp521r1_uint1)(0x0 - x2); +} + +/* + * The function fiat_secp521r1_addcarryx_u57 is an addition with carry. + * + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^57 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^57⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x1ffffffffffffff] + * arg3: [0x0 ~> 0x1ffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0x1ffffffffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_addcarryx_u57(uint64_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint64_t arg2, uint64_t arg3) +{ + uint64_t x1; + uint64_t x2; + fiat_secp521r1_uint1 x3; + x1 = ((arg1 + arg2) + arg3); + x2 = (x1 & UINT64_C(0x1ffffffffffffff)); + x3 = (fiat_secp521r1_uint1)(x1 >> 57); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp521r1_subborrowx_u57 is a subtraction with borrow. + * + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^57 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^57⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x1ffffffffffffff] + * arg3: [0x0 ~> 0x1ffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0x1ffffffffffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_subborrowx_u57(uint64_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint64_t arg2, uint64_t arg3) +{ + int64_t x1; + fiat_secp521r1_int1 x2; + uint64_t x3; + x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3); + x2 = (fiat_secp521r1_int1)(x1 >> 57); + x3 = (x1 & UINT64_C(0x1ffffffffffffff)); + *out1 = x3; + *out2 = (fiat_secp521r1_uint1)(0x0 - x2); +} + +/* + * The function fiat_secp521r1_cmovznz_u64 is a single-word conditional move. + * + * Postconditions: + * out1 = (if arg1 = 0 then arg2 else arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffffffffffff] + * arg3: [0x0 ~> 0xffffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + */ +static void +fiat_secp521r1_cmovznz_u64(uint64_t *out1, + fiat_secp521r1_uint1 arg1, uint64_t arg2, + uint64_t arg3) +{ + fiat_secp521r1_uint1 x1; + uint64_t x2; + uint64_t x3; + x1 = (!(!arg1)); + x2 = ((fiat_secp521r1_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); + x3 = ((fiat_secp521r1_value_barrier_u64(x2) & arg3) | + (fiat_secp521r1_value_barrier_u64((~x2)) & arg2)); + *out1 = x3; +} + +/* + * The function fiat_secp521r1_carry_mul multiplies two field elements and reduces the result. + * + * Postconditions: + * eval out1 mod m = (eval arg1 * eval arg2) mod m + * + */ +static void +fiat_secp521r1_carry_mul( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1, + const fiat_secp521r1_loose_field_element arg2) +{ + fiat_secp521r1_uint128 x1; + fiat_secp521r1_uint128 x2; + fiat_secp521r1_uint128 x3; + fiat_secp521r1_uint128 x4; + fiat_secp521r1_uint128 x5; + fiat_secp521r1_uint128 x6; + fiat_secp521r1_uint128 x7; + fiat_secp521r1_uint128 x8; + fiat_secp521r1_uint128 x9; + fiat_secp521r1_uint128 x10; + fiat_secp521r1_uint128 x11; + fiat_secp521r1_uint128 x12; + fiat_secp521r1_uint128 x13; + fiat_secp521r1_uint128 x14; + fiat_secp521r1_uint128 x15; + fiat_secp521r1_uint128 x16; + fiat_secp521r1_uint128 x17; + fiat_secp521r1_uint128 x18; + fiat_secp521r1_uint128 x19; + fiat_secp521r1_uint128 x20; + fiat_secp521r1_uint128 x21; + fiat_secp521r1_uint128 x22; + fiat_secp521r1_uint128 x23; + fiat_secp521r1_uint128 x24; + fiat_secp521r1_uint128 x25; + fiat_secp521r1_uint128 x26; + fiat_secp521r1_uint128 x27; + fiat_secp521r1_uint128 x28; + fiat_secp521r1_uint128 x29; + fiat_secp521r1_uint128 x30; + fiat_secp521r1_uint128 x31; + fiat_secp521r1_uint128 x32; + fiat_secp521r1_uint128 x33; + fiat_secp521r1_uint128 x34; + fiat_secp521r1_uint128 x35; + fiat_secp521r1_uint128 x36; + fiat_secp521r1_uint128 x37; + fiat_secp521r1_uint128 x38; + fiat_secp521r1_uint128 x39; + fiat_secp521r1_uint128 x40; + fiat_secp521r1_uint128 x41; + fiat_secp521r1_uint128 x42; + fiat_secp521r1_uint128 x43; + fiat_secp521r1_uint128 x44; + fiat_secp521r1_uint128 x45; + fiat_secp521r1_uint128 x46; + fiat_secp521r1_uint128 x47; + fiat_secp521r1_uint128 x48; + fiat_secp521r1_uint128 x49; + fiat_secp521r1_uint128 x50; + fiat_secp521r1_uint128 x51; + fiat_secp521r1_uint128 x52; + fiat_secp521r1_uint128 x53; + fiat_secp521r1_uint128 x54; + fiat_secp521r1_uint128 x55; + fiat_secp521r1_uint128 x56; + fiat_secp521r1_uint128 x57; + fiat_secp521r1_uint128 x58; + fiat_secp521r1_uint128 x59; + fiat_secp521r1_uint128 x60; + fiat_secp521r1_uint128 x61; + fiat_secp521r1_uint128 x62; + fiat_secp521r1_uint128 x63; + fiat_secp521r1_uint128 x64; + fiat_secp521r1_uint128 x65; + fiat_secp521r1_uint128 x66; + fiat_secp521r1_uint128 x67; + fiat_secp521r1_uint128 x68; + fiat_secp521r1_uint128 x69; + fiat_secp521r1_uint128 x70; + fiat_secp521r1_uint128 x71; + fiat_secp521r1_uint128 x72; + fiat_secp521r1_uint128 x73; + fiat_secp521r1_uint128 x74; + fiat_secp521r1_uint128 x75; + fiat_secp521r1_uint128 x76; + fiat_secp521r1_uint128 x77; + fiat_secp521r1_uint128 x78; + fiat_secp521r1_uint128 x79; + fiat_secp521r1_uint128 x80; + fiat_secp521r1_uint128 x81; + fiat_secp521r1_uint128 x82; + fiat_secp521r1_uint128 x83; + uint64_t x84; + fiat_secp521r1_uint128 x85; + fiat_secp521r1_uint128 x86; + fiat_secp521r1_uint128 x87; + fiat_secp521r1_uint128 x88; + fiat_secp521r1_uint128 x89; + fiat_secp521r1_uint128 x90; + fiat_secp521r1_uint128 x91; + fiat_secp521r1_uint128 x92; + fiat_secp521r1_uint128 x93; + fiat_secp521r1_uint128 x94; + uint64_t x95; + fiat_secp521r1_uint128 x96; + fiat_secp521r1_uint128 x97; + uint64_t x98; + fiat_secp521r1_uint128 x99; + fiat_secp521r1_uint128 x100; + uint64_t x101; + fiat_secp521r1_uint128 x102; + fiat_secp521r1_uint128 x103; + uint64_t x104; + fiat_secp521r1_uint128 x105; + fiat_secp521r1_uint128 x106; + uint64_t x107; + fiat_secp521r1_uint128 x108; + fiat_secp521r1_uint128 x109; + uint64_t x110; + fiat_secp521r1_uint128 x111; + fiat_secp521r1_uint128 x112; + uint64_t x113; + fiat_secp521r1_uint128 x114; + fiat_secp521r1_uint128 x115; + uint64_t x116; + fiat_secp521r1_uint128 x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + fiat_secp521r1_uint1 x121; + uint64_t x122; + uint64_t x123; + x1 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[8]) * 0x2)); + x2 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[7]) * 0x2)); + x3 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[6]) * 0x2)); + x4 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[5]) * 0x2)); + x5 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[4]) * 0x2)); + x6 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[3]) * 0x2)); + x7 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[2]) * 0x2)); + x8 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[1]) * 0x2)); + x9 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[8]) * 0x2)); + x10 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[7]) * 0x2)); + x11 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[6]) * 0x2)); + x12 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[5]) * 0x2)); + x13 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[4]) * 0x2)); + x14 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[3]) * 0x2)); + x15 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[2]) * 0x2)); + x16 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[8]) * 0x2)); + x17 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[7]) * 0x2)); + x18 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[6]) * 0x2)); + x19 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[5]) * 0x2)); + x20 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[4]) * 0x2)); + x21 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[3]) * 0x2)); + x22 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[8]) * 0x2)); + x23 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[7]) * 0x2)); + x24 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[6]) * 0x2)); + x25 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[5]) * 0x2)); + x26 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[4]) * 0x2)); + x27 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[8]) * 0x2)); + x28 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[7]) * 0x2)); + x29 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[6]) * 0x2)); + x30 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[5]) * 0x2)); + x31 = ((fiat_secp521r1_uint128)(arg1[3]) * ((arg2[8]) * 0x2)); + x32 = ((fiat_secp521r1_uint128)(arg1[3]) * ((arg2[7]) * 0x2)); + x33 = ((fiat_secp521r1_uint128)(arg1[3]) * ((arg2[6]) * 0x2)); + x34 = ((fiat_secp521r1_uint128)(arg1[2]) * ((arg2[8]) * 0x2)); + x35 = ((fiat_secp521r1_uint128)(arg1[2]) * ((arg2[7]) * 0x2)); + x36 = ((fiat_secp521r1_uint128)(arg1[1]) * ((arg2[8]) * 0x2)); + x37 = ((fiat_secp521r1_uint128)(arg1[8]) * (arg2[0])); + x38 = ((fiat_secp521r1_uint128)(arg1[7]) * (arg2[1])); + x39 = ((fiat_secp521r1_uint128)(arg1[7]) * (arg2[0])); + x40 = ((fiat_secp521r1_uint128)(arg1[6]) * (arg2[2])); + x41 = ((fiat_secp521r1_uint128)(arg1[6]) * (arg2[1])); + x42 = ((fiat_secp521r1_uint128)(arg1[6]) * (arg2[0])); + x43 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[3])); + x44 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[2])); + x45 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[1])); + x46 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[0])); + x47 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[4])); + x48 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[3])); + x49 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[2])); + x50 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[1])); + x51 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[0])); + x52 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[5])); + x53 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[4])); + x54 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[3])); + x55 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[2])); + x56 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[1])); + x57 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[0])); + x58 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[6])); + x59 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[5])); + x60 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[4])); + x61 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[3])); + x62 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[2])); + x63 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[1])); + x64 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[0])); + x65 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[7])); + x66 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[6])); + x67 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[5])); + x68 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[4])); + x69 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[3])); + x70 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[2])); + x71 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[1])); + x72 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[0])); + x73 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[8])); + x74 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[7])); + x75 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[6])); + x76 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[5])); + x77 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[4])); + x78 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[3])); + x79 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[2])); + x80 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[1])); + x81 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[0])); + x82 = (x81 + (x36 + (x35 + (x33 + (x30 + (x26 + (x21 + (x15 + x8)))))))); + x83 = (x82 >> 58); + x84 = (uint64_t)(x82 & UINT64_C(0x3ffffffffffffff)); + x85 = (x73 + (x65 + (x58 + (x52 + (x47 + (x43 + (x40 + (x38 + x37)))))))); + x86 = (x74 + (x66 + (x59 + (x53 + (x48 + (x44 + (x41 + (x39 + x1)))))))); + x87 = (x75 + (x67 + (x60 + (x54 + (x49 + (x45 + (x42 + (x9 + x2)))))))); + x88 = (x76 + (x68 + (x61 + (x55 + (x50 + (x46 + (x16 + (x10 + x3)))))))); + x89 = (x77 + (x69 + (x62 + (x56 + (x51 + (x22 + (x17 + (x11 + x4)))))))); + x90 = (x78 + (x70 + (x63 + (x57 + (x27 + (x23 + (x18 + (x12 + x5)))))))); + x91 = (x79 + (x71 + (x64 + (x31 + (x28 + (x24 + (x19 + (x13 + x6)))))))); + x92 = (x80 + (x72 + (x34 + (x32 + (x29 + (x25 + (x20 + (x14 + x7)))))))); + x93 = (x83 + x92); + x94 = (x93 >> 58); + x95 = (uint64_t)(x93 & UINT64_C(0x3ffffffffffffff)); + x96 = (x94 + x91); + x97 = (x96 >> 58); + x98 = (uint64_t)(x96 & UINT64_C(0x3ffffffffffffff)); + x99 = (x97 + x90); + x100 = (x99 >> 58); + x101 = (uint64_t)(x99 & UINT64_C(0x3ffffffffffffff)); + x102 = (x100 + x89); + x103 = (x102 >> 58); + x104 = (uint64_t)(x102 & UINT64_C(0x3ffffffffffffff)); + x105 = (x103 + x88); + x106 = (x105 >> 58); + x107 = (uint64_t)(x105 & UINT64_C(0x3ffffffffffffff)); + x108 = (x106 + x87); + x109 = (x108 >> 58); + x110 = (uint64_t)(x108 & UINT64_C(0x3ffffffffffffff)); + x111 = (x109 + x86); + x112 = (x111 >> 58); + x113 = (uint64_t)(x111 & UINT64_C(0x3ffffffffffffff)); + x114 = (x112 + x85); + x115 = (x114 >> 57); + x116 = (uint64_t)(x114 & UINT64_C(0x1ffffffffffffff)); + x117 = (x84 + x115); + x118 = (uint64_t)(x117 >> 58); + x119 = (uint64_t)(x117 & UINT64_C(0x3ffffffffffffff)); + x120 = (x118 + x95); + x121 = (fiat_secp521r1_uint1)(x120 >> 58); + x122 = (x120 & UINT64_C(0x3ffffffffffffff)); + x123 = (x121 + x98); + out1[0] = x119; + out1[1] = x122; + out1[2] = x123; + out1[3] = x101; + out1[4] = x104; + out1[5] = x107; + out1[6] = x110; + out1[7] = x113; + out1[8] = x116; +} + +/* + * The function fiat_secp521r1_carry_square squares a field element and reduces the result. + * + * Postconditions: + * eval out1 mod m = (eval arg1 * eval arg1) mod m + * + */ +static void +fiat_secp521r1_carry_square( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + fiat_secp521r1_uint128 x17; + fiat_secp521r1_uint128 x18; + fiat_secp521r1_uint128 x19; + fiat_secp521r1_uint128 x20; + fiat_secp521r1_uint128 x21; + fiat_secp521r1_uint128 x22; + fiat_secp521r1_uint128 x23; + fiat_secp521r1_uint128 x24; + fiat_secp521r1_uint128 x25; + fiat_secp521r1_uint128 x26; + fiat_secp521r1_uint128 x27; + fiat_secp521r1_uint128 x28; + fiat_secp521r1_uint128 x29; + fiat_secp521r1_uint128 x30; + fiat_secp521r1_uint128 x31; + fiat_secp521r1_uint128 x32; + fiat_secp521r1_uint128 x33; + fiat_secp521r1_uint128 x34; + fiat_secp521r1_uint128 x35; + fiat_secp521r1_uint128 x36; + fiat_secp521r1_uint128 x37; + fiat_secp521r1_uint128 x38; + fiat_secp521r1_uint128 x39; + fiat_secp521r1_uint128 x40; + fiat_secp521r1_uint128 x41; + fiat_secp521r1_uint128 x42; + fiat_secp521r1_uint128 x43; + fiat_secp521r1_uint128 x44; + fiat_secp521r1_uint128 x45; + fiat_secp521r1_uint128 x46; + fiat_secp521r1_uint128 x47; + fiat_secp521r1_uint128 x48; + fiat_secp521r1_uint128 x49; + fiat_secp521r1_uint128 x50; + fiat_secp521r1_uint128 x51; + fiat_secp521r1_uint128 x52; + fiat_secp521r1_uint128 x53; + fiat_secp521r1_uint128 x54; + fiat_secp521r1_uint128 x55; + fiat_secp521r1_uint128 x56; + fiat_secp521r1_uint128 x57; + fiat_secp521r1_uint128 x58; + fiat_secp521r1_uint128 x59; + fiat_secp521r1_uint128 x60; + fiat_secp521r1_uint128 x61; + fiat_secp521r1_uint128 x62; + fiat_secp521r1_uint128 x63; + uint64_t x64; + fiat_secp521r1_uint128 x65; + fiat_secp521r1_uint128 x66; + fiat_secp521r1_uint128 x67; + fiat_secp521r1_uint128 x68; + fiat_secp521r1_uint128 x69; + fiat_secp521r1_uint128 x70; + fiat_secp521r1_uint128 x71; + fiat_secp521r1_uint128 x72; + fiat_secp521r1_uint128 x73; + fiat_secp521r1_uint128 x74; + uint64_t x75; + fiat_secp521r1_uint128 x76; + fiat_secp521r1_uint128 x77; + uint64_t x78; + fiat_secp521r1_uint128 x79; + fiat_secp521r1_uint128 x80; + uint64_t x81; + fiat_secp521r1_uint128 x82; + fiat_secp521r1_uint128 x83; + uint64_t x84; + fiat_secp521r1_uint128 x85; + fiat_secp521r1_uint128 x86; + uint64_t x87; + fiat_secp521r1_uint128 x88; + fiat_secp521r1_uint128 x89; + uint64_t x90; + fiat_secp521r1_uint128 x91; + fiat_secp521r1_uint128 x92; + uint64_t x93; + fiat_secp521r1_uint128 x94; + fiat_secp521r1_uint128 x95; + uint64_t x96; + fiat_secp521r1_uint128 x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + fiat_secp521r1_uint1 x101; + uint64_t x102; + uint64_t x103; + x1 = (arg1[8]); + x2 = (x1 * 0x2); + x3 = ((arg1[8]) * 0x2); + x4 = (arg1[7]); + x5 = (x4 * 0x2); + x6 = ((arg1[7]) * 0x2); + x7 = (arg1[6]); + x8 = (x7 * 0x2); + x9 = ((arg1[6]) * 0x2); + x10 = (arg1[5]); + x11 = (x10 * 0x2); + x12 = ((arg1[5]) * 0x2); + x13 = ((arg1[4]) * 0x2); + x14 = ((arg1[3]) * 0x2); + x15 = ((arg1[2]) * 0x2); + x16 = ((arg1[1]) * 0x2); + x17 = ((fiat_secp521r1_uint128)(arg1[8]) * (x1 * 0x2)); + x18 = ((fiat_secp521r1_uint128)(arg1[7]) * (x2 * 0x2)); + x19 = ((fiat_secp521r1_uint128)(arg1[7]) * (x4 * 0x2)); + x20 = ((fiat_secp521r1_uint128)(arg1[6]) * (x2 * 0x2)); + x21 = ((fiat_secp521r1_uint128)(arg1[6]) * (x5 * 0x2)); + x22 = ((fiat_secp521r1_uint128)(arg1[6]) * (x7 * 0x2)); + x23 = ((fiat_secp521r1_uint128)(arg1[5]) * (x2 * 0x2)); + x24 = ((fiat_secp521r1_uint128)(arg1[5]) * (x5 * 0x2)); + x25 = ((fiat_secp521r1_uint128)(arg1[5]) * (x8 * 0x2)); + x26 = ((fiat_secp521r1_uint128)(arg1[5]) * (x10 * 0x2)); + x27 = ((fiat_secp521r1_uint128)(arg1[4]) * (x2 * 0x2)); + x28 = ((fiat_secp521r1_uint128)(arg1[4]) * (x5 * 0x2)); + x29 = ((fiat_secp521r1_uint128)(arg1[4]) * (x8 * 0x2)); + x30 = ((fiat_secp521r1_uint128)(arg1[4]) * (x11 * 0x2)); + x31 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg1[4])); + x32 = ((fiat_secp521r1_uint128)(arg1[3]) * (x2 * 0x2)); + x33 = ((fiat_secp521r1_uint128)(arg1[3]) * (x5 * 0x2)); + x34 = ((fiat_secp521r1_uint128)(arg1[3]) * (x8 * 0x2)); + x35 = ((fiat_secp521r1_uint128)(arg1[3]) * x12); + x36 = ((fiat_secp521r1_uint128)(arg1[3]) * x13); + x37 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg1[3])); + x38 = ((fiat_secp521r1_uint128)(arg1[2]) * (x2 * 0x2)); + x39 = ((fiat_secp521r1_uint128)(arg1[2]) * (x5 * 0x2)); + x40 = ((fiat_secp521r1_uint128)(arg1[2]) * x9); + x41 = ((fiat_secp521r1_uint128)(arg1[2]) * x12); + x42 = ((fiat_secp521r1_uint128)(arg1[2]) * x13); + x43 = ((fiat_secp521r1_uint128)(arg1[2]) * x14); + x44 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg1[2])); + x45 = ((fiat_secp521r1_uint128)(arg1[1]) * (x2 * 0x2)); + x46 = ((fiat_secp521r1_uint128)(arg1[1]) * x6); + x47 = ((fiat_secp521r1_uint128)(arg1[1]) * x9); + x48 = ((fiat_secp521r1_uint128)(arg1[1]) * x12); + x49 = ((fiat_secp521r1_uint128)(arg1[1]) * x13); + x50 = ((fiat_secp521r1_uint128)(arg1[1]) * x14); + x51 = ((fiat_secp521r1_uint128)(arg1[1]) * x15); + x52 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg1[1])); + x53 = ((fiat_secp521r1_uint128)(arg1[0]) * x3); + x54 = ((fiat_secp521r1_uint128)(arg1[0]) * x6); + x55 = ((fiat_secp521r1_uint128)(arg1[0]) * x9); + x56 = ((fiat_secp521r1_uint128)(arg1[0]) * x12); + x57 = ((fiat_secp521r1_uint128)(arg1[0]) * x13); + x58 = ((fiat_secp521r1_uint128)(arg1[0]) * x14); + x59 = ((fiat_secp521r1_uint128)(arg1[0]) * x15); + x60 = ((fiat_secp521r1_uint128)(arg1[0]) * x16); + x61 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg1[0])); + x62 = (x61 + (x45 + (x39 + (x34 + x30)))); + x63 = (x62 >> 58); + x64 = (uint64_t)(x62 & UINT64_C(0x3ffffffffffffff)); + x65 = (x53 + (x46 + (x40 + (x35 + x31)))); + x66 = (x54 + (x47 + (x41 + (x36 + x17)))); + x67 = (x55 + (x48 + (x42 + (x37 + x18)))); + x68 = (x56 + (x49 + (x43 + (x20 + x19)))); + x69 = (x57 + (x50 + (x44 + (x23 + x21)))); + x70 = (x58 + (x51 + (x27 + (x24 + x22)))); + x71 = (x59 + (x52 + (x32 + (x28 + x25)))); + x72 = (x60 + (x38 + (x33 + (x29 + x26)))); + x73 = (x63 + x72); + x74 = (x73 >> 58); + x75 = (uint64_t)(x73 & UINT64_C(0x3ffffffffffffff)); + x76 = (x74 + x71); + x77 = (x76 >> 58); + x78 = (uint64_t)(x76 & UINT64_C(0x3ffffffffffffff)); + x79 = (x77 + x70); + x80 = (x79 >> 58); + x81 = (uint64_t)(x79 & UINT64_C(0x3ffffffffffffff)); + x82 = (x80 + x69); + x83 = (x82 >> 58); + x84 = (uint64_t)(x82 & UINT64_C(0x3ffffffffffffff)); + x85 = (x83 + x68); + x86 = (x85 >> 58); + x87 = (uint64_t)(x85 & UINT64_C(0x3ffffffffffffff)); + x88 = (x86 + x67); + x89 = (x88 >> 58); + x90 = (uint64_t)(x88 & UINT64_C(0x3ffffffffffffff)); + x91 = (x89 + x66); + x92 = (x91 >> 58); + x93 = (uint64_t)(x91 & UINT64_C(0x3ffffffffffffff)); + x94 = (x92 + x65); + x95 = (x94 >> 57); + x96 = (uint64_t)(x94 & UINT64_C(0x1ffffffffffffff)); + x97 = (x64 + x95); + x98 = (uint64_t)(x97 >> 58); + x99 = (uint64_t)(x97 & UINT64_C(0x3ffffffffffffff)); + x100 = (x98 + x75); + x101 = (fiat_secp521r1_uint1)(x100 >> 58); + x102 = (x100 & UINT64_C(0x3ffffffffffffff)); + x103 = (x101 + x78); + out1[0] = x99; + out1[1] = x102; + out1[2] = x103; + out1[3] = x81; + out1[4] = x84; + out1[5] = x87; + out1[6] = x90; + out1[7] = x93; + out1[8] = x96; +} + +/* + * The function fiat_secp521r1_carry_add adds two field elements. + * + * Postconditions: + * eval out1 mod m = (eval arg1 + eval arg2) mod m + * + */ +static void +fiat_secp521r1_carry_add( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + x1 = ((arg1[0]) + (arg2[0])); + x2 = ((x1 >> 58) + ((arg1[1]) + (arg2[1]))); + x3 = ((x2 >> 58) + ((arg1[2]) + (arg2[2]))); + x4 = ((x3 >> 58) + ((arg1[3]) + (arg2[3]))); + x5 = ((x4 >> 58) + ((arg1[4]) + (arg2[4]))); + x6 = ((x5 >> 58) + ((arg1[5]) + (arg2[5]))); + x7 = ((x6 >> 58) + ((arg1[6]) + (arg2[6]))); + x8 = ((x7 >> 58) + ((arg1[7]) + (arg2[7]))); + x9 = ((x8 >> 58) + ((arg1[8]) + (arg2[8]))); + x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + (x9 >> 57)); + x11 = ((fiat_secp521r1_uint1)(x10 >> 58) + + (x2 & UINT64_C(0x3ffffffffffffff))); + x12 = (x10 & UINT64_C(0x3ffffffffffffff)); + x13 = (x11 & UINT64_C(0x3ffffffffffffff)); + x14 = ((fiat_secp521r1_uint1)(x11 >> 58) + + (x3 & UINT64_C(0x3ffffffffffffff))); + x15 = (x4 & UINT64_C(0x3ffffffffffffff)); + x16 = (x5 & UINT64_C(0x3ffffffffffffff)); + x17 = (x6 & UINT64_C(0x3ffffffffffffff)); + x18 = (x7 & UINT64_C(0x3ffffffffffffff)); + x19 = (x8 & UINT64_C(0x3ffffffffffffff)); + x20 = (x9 & UINT64_C(0x1ffffffffffffff)); + out1[0] = x12; + out1[1] = x13; + out1[2] = x14; + out1[3] = x15; + out1[4] = x16; + out1[5] = x17; + out1[6] = x18; + out1[7] = x19; + out1[8] = x20; +} + +/* + * The function fiat_secp521r1_carry_sub subtracts two field elements. + * + * Postconditions: + * eval out1 mod m = (eval arg1 - eval arg2) mod m + * + */ +static void +fiat_secp521r1_carry_sub( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + x1 = ((UINT64_C(0x7fffffffffffffe) + (arg1[0])) - (arg2[0])); + x2 = ((x1 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[1])) - (arg2[1]))); + x3 = ((x2 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[2])) - (arg2[2]))); + x4 = ((x3 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[3])) - (arg2[3]))); + x5 = ((x4 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[4])) - (arg2[4]))); + x6 = ((x5 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[5])) - (arg2[5]))); + x7 = ((x6 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[6])) - (arg2[6]))); + x8 = ((x7 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[7])) - (arg2[7]))); + x9 = ((x8 >> 58) + ((UINT64_C(0x3fffffffffffffe) + (arg1[8])) - (arg2[8]))); + x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + (x9 >> 57)); + x11 = ((fiat_secp521r1_uint1)(x10 >> 58) + + (x2 & UINT64_C(0x3ffffffffffffff))); + x12 = (x10 & UINT64_C(0x3ffffffffffffff)); + x13 = (x11 & UINT64_C(0x3ffffffffffffff)); + x14 = ((fiat_secp521r1_uint1)(x11 >> 58) + + (x3 & UINT64_C(0x3ffffffffffffff))); + x15 = (x4 & UINT64_C(0x3ffffffffffffff)); + x16 = (x5 & UINT64_C(0x3ffffffffffffff)); + x17 = (x6 & UINT64_C(0x3ffffffffffffff)); + x18 = (x7 & UINT64_C(0x3ffffffffffffff)); + x19 = (x8 & UINT64_C(0x3ffffffffffffff)); + x20 = (x9 & UINT64_C(0x1ffffffffffffff)); + out1[0] = x12; + out1[1] = x13; + out1[2] = x14; + out1[3] = x15; + out1[4] = x16; + out1[5] = x17; + out1[6] = x18; + out1[7] = x19; + out1[8] = x20; +} + +/* + * The function fiat_secp521r1_carry_opp negates a field element. + * + * Postconditions: + * eval out1 mod m = -eval arg1 mod m + * + */ +static void +fiat_secp521r1_carry_opp( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + x1 = (UINT64_C(0x7fffffffffffffe) - (arg1[0])); + x2 = ((fiat_secp521r1_uint1)(x1 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[1]))); + x3 = ((fiat_secp521r1_uint1)(x2 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[2]))); + x4 = ((fiat_secp521r1_uint1)(x3 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[3]))); + x5 = ((fiat_secp521r1_uint1)(x4 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[4]))); + x6 = ((fiat_secp521r1_uint1)(x5 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[5]))); + x7 = ((fiat_secp521r1_uint1)(x6 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[6]))); + x8 = ((fiat_secp521r1_uint1)(x7 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[7]))); + x9 = ((fiat_secp521r1_uint1)(x8 >> 58) + + (UINT64_C(0x3fffffffffffffe) - (arg1[8]))); + x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + + (uint64_t)(fiat_secp521r1_uint1)(x9 >> 57)); + x11 = ((fiat_secp521r1_uint1)(x10 >> 58) + + (x2 & UINT64_C(0x3ffffffffffffff))); + x12 = (x10 & UINT64_C(0x3ffffffffffffff)); + x13 = (x11 & UINT64_C(0x3ffffffffffffff)); + x14 = ((fiat_secp521r1_uint1)(x11 >> 58) + + (x3 & UINT64_C(0x3ffffffffffffff))); + x15 = (x4 & UINT64_C(0x3ffffffffffffff)); + x16 = (x5 & UINT64_C(0x3ffffffffffffff)); + x17 = (x6 & UINT64_C(0x3ffffffffffffff)); + x18 = (x7 & UINT64_C(0x3ffffffffffffff)); + x19 = (x8 & UINT64_C(0x3ffffffffffffff)); + x20 = (x9 & UINT64_C(0x1ffffffffffffff)); + out1[0] = x12; + out1[1] = x13; + out1[2] = x14; + out1[3] = x15; + out1[4] = x16; + out1[5] = x17; + out1[6] = x18; + out1[7] = x19; + out1[8] = x20; +} + +/* + * The function fiat_secp521r1_selectznz is a multi-limb conditional select. + * + * Postconditions: + * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static void +fiat_secp521r1_selectznz(uint64_t out1[9], + fiat_secp521r1_uint1 arg1, + const uint64_t arg2[9], + const uint64_t arg3[9]) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + fiat_secp521r1_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0])); + fiat_secp521r1_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1])); + fiat_secp521r1_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2])); + fiat_secp521r1_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3])); + fiat_secp521r1_cmovznz_u64(&x5, arg1, (arg2[4]), (arg3[4])); + fiat_secp521r1_cmovznz_u64(&x6, arg1, (arg2[5]), (arg3[5])); + fiat_secp521r1_cmovznz_u64(&x7, arg1, (arg2[6]), (arg3[6])); + fiat_secp521r1_cmovznz_u64(&x8, arg1, (arg2[7]), (arg3[7])); + fiat_secp521r1_cmovznz_u64(&x9, arg1, (arg2[8]), (arg3[8])); + out1[0] = x1; + out1[1] = x2; + out1[2] = x3; + out1[3] = x4; + out1[4] = x5; + out1[5] = x6; + out1[6] = x7; + out1[7] = x8; + out1[8] = x9; +} + +/* + * The function fiat_secp521r1_to_bytes serializes a field element to bytes in little-endian order. + * + * Postconditions: + * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65] + * + * Output Bounds: + * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] + */ +static void +fiat_secp521r1_to_bytes( + uint8_t out1[66], const fiat_secp521r1_tight_field_element arg1) +{ + uint64_t x1; + fiat_secp521r1_uint1 x2; + uint64_t x3; + fiat_secp521r1_uint1 x4; + uint64_t x5; + fiat_secp521r1_uint1 x6; + uint64_t x7; + fiat_secp521r1_uint1 x8; + uint64_t x9; + fiat_secp521r1_uint1 x10; + uint64_t x11; + fiat_secp521r1_uint1 x12; + uint64_t x13; + fiat_secp521r1_uint1 x14; + uint64_t x15; + fiat_secp521r1_uint1 x16; + uint64_t x17; + fiat_secp521r1_uint1 x18; + uint64_t x19; + uint64_t x20; + fiat_secp521r1_uint1 x21; + uint64_t x22; + fiat_secp521r1_uint1 x23; + uint64_t x24; + fiat_secp521r1_uint1 x25; + uint64_t x26; + fiat_secp521r1_uint1 x27; + uint64_t x28; + fiat_secp521r1_uint1 x29; + uint64_t x30; + fiat_secp521r1_uint1 x31; + uint64_t x32; + fiat_secp521r1_uint1 x33; + uint64_t x34; + fiat_secp521r1_uint1 x35; + uint64_t x36; + fiat_secp521r1_uint1 x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint8_t x44; + uint64_t x45; + uint8_t x46; + uint64_t x47; + uint8_t x48; + uint64_t x49; + uint8_t x50; + uint64_t x51; + uint8_t x52; + uint64_t x53; + uint8_t x54; + uint64_t x55; + uint8_t x56; + uint8_t x57; + uint64_t x58; + uint8_t x59; + uint64_t x60; + uint8_t x61; + uint64_t x62; + uint8_t x63; + uint64_t x64; + uint8_t x65; + uint64_t x66; + uint8_t x67; + uint64_t x68; + uint8_t x69; + uint64_t x70; + uint8_t x71; + uint8_t x72; + uint64_t x73; + uint8_t x74; + uint64_t x75; + uint8_t x76; + uint64_t x77; + uint8_t x78; + uint64_t x79; + uint8_t x80; + uint64_t x81; + uint8_t x82; + uint64_t x83; + uint8_t x84; + uint64_t x85; + uint8_t x86; + uint8_t x87; + uint64_t x88; + uint8_t x89; + uint64_t x90; + uint8_t x91; + uint64_t x92; + uint8_t x93; + uint64_t x94; + uint8_t x95; + uint64_t x96; + uint8_t x97; + uint64_t x98; + uint8_t x99; + uint64_t x100; + uint8_t x101; + uint8_t x102; + uint8_t x103; + uint64_t x104; + uint8_t x105; + uint64_t x106; + uint8_t x107; + uint64_t x108; + uint8_t x109; + uint64_t x110; + uint8_t x111; + uint64_t x112; + uint8_t x113; + uint64_t x114; + uint8_t x115; + uint8_t x116; + uint64_t x117; + uint8_t x118; + uint64_t x119; + uint8_t x120; + uint64_t x121; + uint8_t x122; + uint64_t x123; + uint8_t x124; + uint64_t x125; + uint8_t x126; + uint64_t x127; + uint8_t x128; + uint64_t x129; + uint8_t x130; + uint8_t x131; + uint64_t x132; + uint8_t x133; + uint64_t x134; + uint8_t x135; + uint64_t x136; + uint8_t x137; + uint64_t x138; + uint8_t x139; + uint64_t x140; + uint8_t x141; + uint64_t x142; + uint8_t x143; + uint64_t x144; + uint8_t x145; + uint8_t x146; + uint64_t x147; + uint8_t x148; + uint64_t x149; + uint8_t x150; + uint64_t x151; + uint8_t x152; + uint64_t x153; + uint8_t x154; + uint64_t x155; + uint8_t x156; + uint64_t x157; + uint8_t x158; + uint64_t x159; + uint8_t x160; + uint8_t x161; + uint8_t x162; + uint64_t x163; + uint8_t x164; + uint64_t x165; + uint8_t x166; + uint64_t x167; + uint8_t x168; + uint64_t x169; + uint8_t x170; + uint64_t x171; + uint8_t x172; + uint64_t x173; + uint8_t x174; + fiat_secp521r1_uint1 x175; + fiat_secp521r1_subborrowx_u58(&x1, &x2, 0x0, (arg1[0]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x3, &x4, x2, (arg1[1]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x5, &x6, x4, (arg1[2]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x7, &x8, x6, (arg1[3]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x9, &x10, x8, (arg1[4]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x11, &x12, x10, (arg1[5]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x13, &x14, x12, (arg1[6]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u58(&x15, &x16, x14, (arg1[7]), + UINT64_C(0x3ffffffffffffff)); + fiat_secp521r1_subborrowx_u57(&x17, &x18, x16, (arg1[8]), + UINT64_C(0x1ffffffffffffff)); + fiat_secp521r1_cmovznz_u64(&x19, x18, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_secp521r1_addcarryx_u58(&x20, &x21, 0x0, x1, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x22, &x23, x21, x3, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x24, &x25, x23, x5, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x26, &x27, x25, x7, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x28, &x29, x27, x9, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x30, &x31, x29, x11, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x32, &x33, x31, x13, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u58(&x34, &x35, x33, x15, + (x19 & UINT64_C(0x3ffffffffffffff))); + fiat_secp521r1_addcarryx_u57(&x36, &x37, x35, x17, + (x19 & UINT64_C(0x1ffffffffffffff))); + x38 = (x34 << 6); + x39 = (x32 << 4); + x40 = (x30 << 2); + x41 = (x26 << 6); + x42 = (x24 << 4); + x43 = (x22 << 2); + x44 = (uint8_t)(x20 & UINT8_C(0xff)); + x45 = (x20 >> 8); + x46 = (uint8_t)(x45 & UINT8_C(0xff)); + x47 = (x45 >> 8); + x48 = (uint8_t)(x47 & UINT8_C(0xff)); + x49 = (x47 >> 8); + x50 = (uint8_t)(x49 & UINT8_C(0xff)); + x51 = (x49 >> 8); + x52 = (uint8_t)(x51 & UINT8_C(0xff)); + x53 = (x51 >> 8); + x54 = (uint8_t)(x53 & UINT8_C(0xff)); + x55 = (x53 >> 8); + x56 = (uint8_t)(x55 & UINT8_C(0xff)); + x57 = (uint8_t)(x55 >> 8); + x58 = (x43 + (uint64_t)x57); + x59 = (uint8_t)(x58 & UINT8_C(0xff)); + x60 = (x58 >> 8); + x61 = (uint8_t)(x60 & UINT8_C(0xff)); + x62 = (x60 >> 8); + x63 = (uint8_t)(x62 & UINT8_C(0xff)); + x64 = (x62 >> 8); + x65 = (uint8_t)(x64 & UINT8_C(0xff)); + x66 = (x64 >> 8); + x67 = (uint8_t)(x66 & UINT8_C(0xff)); + x68 = (x66 >> 8); + x69 = (uint8_t)(x68 & UINT8_C(0xff)); + x70 = (x68 >> 8); + x71 = (uint8_t)(x70 & UINT8_C(0xff)); + x72 = (uint8_t)(x70 >> 8); + x73 = (x42 + (uint64_t)x72); + x74 = (uint8_t)(x73 & UINT8_C(0xff)); + x75 = (x73 >> 8); + x76 = (uint8_t)(x75 & UINT8_C(0xff)); + x77 = (x75 >> 8); + x78 = (uint8_t)(x77 & UINT8_C(0xff)); + x79 = (x77 >> 8); + x80 = (uint8_t)(x79 & UINT8_C(0xff)); + x81 = (x79 >> 8); + x82 = (uint8_t)(x81 & UINT8_C(0xff)); + x83 = (x81 >> 8); + x84 = (uint8_t)(x83 & UINT8_C(0xff)); + x85 = (x83 >> 8); + x86 = (uint8_t)(x85 & UINT8_C(0xff)); + x87 = (uint8_t)(x85 >> 8); + x88 = (x41 + (uint64_t)x87); + x89 = (uint8_t)(x88 & UINT8_C(0xff)); + x90 = (x88 >> 8); + x91 = (uint8_t)(x90 & UINT8_C(0xff)); + x92 = (x90 >> 8); + x93 = (uint8_t)(x92 & UINT8_C(0xff)); + x94 = (x92 >> 8); + x95 = (uint8_t)(x94 & UINT8_C(0xff)); + x96 = (x94 >> 8); + x97 = (uint8_t)(x96 & UINT8_C(0xff)); + x98 = (x96 >> 8); + x99 = (uint8_t)(x98 & UINT8_C(0xff)); + x100 = (x98 >> 8); + x101 = (uint8_t)(x100 & UINT8_C(0xff)); + x102 = (uint8_t)(x100 >> 8); + x103 = (uint8_t)(x28 & UINT8_C(0xff)); + x104 = (x28 >> 8); + x105 = (uint8_t)(x104 & UINT8_C(0xff)); + x106 = (x104 >> 8); + x107 = (uint8_t)(x106 & UINT8_C(0xff)); + x108 = (x106 >> 8); + x109 = (uint8_t)(x108 & UINT8_C(0xff)); + x110 = (x108 >> 8); + x111 = (uint8_t)(x110 & UINT8_C(0xff)); + x112 = (x110 >> 8); + x113 = (uint8_t)(x112 & UINT8_C(0xff)); + x114 = (x112 >> 8); + x115 = (uint8_t)(x114 & UINT8_C(0xff)); + x116 = (uint8_t)(x114 >> 8); + x117 = (x40 + (uint64_t)x116); + x118 = (uint8_t)(x117 & UINT8_C(0xff)); + x119 = (x117 >> 8); + x120 = (uint8_t)(x119 & UINT8_C(0xff)); + x121 = (x119 >> 8); + x122 = (uint8_t)(x121 & UINT8_C(0xff)); + x123 = (x121 >> 8); + x124 = (uint8_t)(x123 & UINT8_C(0xff)); + x125 = (x123 >> 8); + x126 = (uint8_t)(x125 & UINT8_C(0xff)); + x127 = (x125 >> 8); + x128 = (uint8_t)(x127 & UINT8_C(0xff)); + x129 = (x127 >> 8); + x130 = (uint8_t)(x129 & UINT8_C(0xff)); + x131 = (uint8_t)(x129 >> 8); + x132 = (x39 + (uint64_t)x131); + x133 = (uint8_t)(x132 & UINT8_C(0xff)); + x134 = (x132 >> 8); + x135 = (uint8_t)(x134 & UINT8_C(0xff)); + x136 = (x134 >> 8); + x137 = (uint8_t)(x136 & UINT8_C(0xff)); + x138 = (x136 >> 8); + x139 = (uint8_t)(x138 & UINT8_C(0xff)); + x140 = (x138 >> 8); + x141 = (uint8_t)(x140 & UINT8_C(0xff)); + x142 = (x140 >> 8); + x143 = (uint8_t)(x142 & UINT8_C(0xff)); + x144 = (x142 >> 8); + x145 = (uint8_t)(x144 & UINT8_C(0xff)); + x146 = (uint8_t)(x144 >> 8); + x147 = (x38 + (uint64_t)x146); + x148 = (uint8_t)(x147 & UINT8_C(0xff)); + x149 = (x147 >> 8); + x150 = (uint8_t)(x149 & UINT8_C(0xff)); + x151 = (x149 >> 8); + x152 = (uint8_t)(x151 & UINT8_C(0xff)); + x153 = (x151 >> 8); + x154 = (uint8_t)(x153 & UINT8_C(0xff)); + x155 = (x153 >> 8); + x156 = (uint8_t)(x155 & UINT8_C(0xff)); + x157 = (x155 >> 8); + x158 = (uint8_t)(x157 & UINT8_C(0xff)); + x159 = (x157 >> 8); + x160 = (uint8_t)(x159 & UINT8_C(0xff)); + x161 = (uint8_t)(x159 >> 8); + x162 = (uint8_t)(x36 & UINT8_C(0xff)); + x163 = (x36 >> 8); + x164 = (uint8_t)(x163 & UINT8_C(0xff)); + x165 = (x163 >> 8); + x166 = (uint8_t)(x165 & UINT8_C(0xff)); + x167 = (x165 >> 8); + x168 = (uint8_t)(x167 & UINT8_C(0xff)); + x169 = (x167 >> 8); + x170 = (uint8_t)(x169 & UINT8_C(0xff)); + x171 = (x169 >> 8); + x172 = (uint8_t)(x171 & UINT8_C(0xff)); + x173 = (x171 >> 8); + x174 = (uint8_t)(x173 & UINT8_C(0xff)); + x175 = (fiat_secp521r1_uint1)(x173 >> 8); + out1[0] = x44; + out1[1] = x46; + out1[2] = x48; + out1[3] = x50; + out1[4] = x52; + out1[5] = x54; + out1[6] = x56; + out1[7] = x59; + out1[8] = x61; + out1[9] = x63; + out1[10] = x65; + out1[11] = x67; + out1[12] = x69; + out1[13] = x71; + out1[14] = x74; + out1[15] = x76; + out1[16] = x78; + out1[17] = x80; + out1[18] = x82; + out1[19] = x84; + out1[20] = x86; + out1[21] = x89; + out1[22] = x91; + out1[23] = x93; + out1[24] = x95; + out1[25] = x97; + out1[26] = x99; + out1[27] = x101; + out1[28] = x102; + out1[29] = x103; + out1[30] = x105; + out1[31] = x107; + out1[32] = x109; + out1[33] = x111; + out1[34] = x113; + out1[35] = x115; + out1[36] = x118; + out1[37] = x120; + out1[38] = x122; + out1[39] = x124; + out1[40] = x126; + out1[41] = x128; + out1[42] = x130; + out1[43] = x133; + out1[44] = x135; + out1[45] = x137; + out1[46] = x139; + out1[47] = x141; + out1[48] = x143; + out1[49] = x145; + out1[50] = x148; + out1[51] = x150; + out1[52] = x152; + out1[53] = x154; + out1[54] = x156; + out1[55] = x158; + out1[56] = x160; + out1[57] = x161; + out1[58] = x162; + out1[59] = x164; + out1[60] = x166; + out1[61] = x168; + out1[62] = x170; + out1[63] = x172; + out1[64] = x174; + out1[65] = x175; +} + +/* + * The function fiat_secp521r1_from_bytes deserializes a field element from bytes in little-endian order. + * + * Postconditions: + * eval out1 mod m = bytes_eval arg1 mod m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] + */ +static void +fiat_secp521r1_from_bytes(fiat_secp521r1_tight_field_element out1, + const uint8_t arg1[66]) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint8_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + uint64_t x21; + uint64_t x22; + uint64_t x23; + uint64_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint8_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + uint64_t x45; + uint64_t x46; + uint64_t x47; + uint64_t x48; + uint64_t x49; + uint64_t x50; + uint64_t x51; + uint64_t x52; + uint64_t x53; + uint64_t x54; + uint64_t x55; + uint64_t x56; + uint64_t x57; + uint64_t x58; + uint64_t x59; + uint64_t x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + uint8_t x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + uint64_t x74; + uint8_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + uint64_t x82; + uint64_t x83; + uint8_t x84; + uint64_t x85; + uint64_t x86; + uint64_t x87; + uint64_t x88; + uint64_t x89; + uint64_t x90; + uint64_t x91; + uint64_t x92; + uint8_t x93; + uint64_t x94; + uint64_t x95; + uint64_t x96; + uint64_t x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + uint64_t x104; + uint64_t x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint8_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + uint64_t x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + uint64_t x117; + uint8_t x118; + uint64_t x119; + uint64_t x120; + uint64_t x121; + uint64_t x122; + uint64_t x123; + uint64_t x124; + uint64_t x125; + uint64_t x126; + uint8_t x127; + uint64_t x128; + uint64_t x129; + uint64_t x130; + uint64_t x131; + uint64_t x132; + uint64_t x133; + uint64_t x134; + uint64_t x135; + uint64_t x136; + uint64_t x137; + uint64_t x138; + uint64_t x139; + uint64_t x140; + uint64_t x141; + x1 = ((uint64_t)(fiat_secp521r1_uint1)(arg1[65]) << 56); + x2 = ((uint64_t)(arg1[64]) << 48); + x3 = ((uint64_t)(arg1[63]) << 40); + x4 = ((uint64_t)(arg1[62]) << 32); + x5 = ((uint64_t)(arg1[61]) << 24); + x6 = ((uint64_t)(arg1[60]) << 16); + x7 = ((uint64_t)(arg1[59]) << 8); + x8 = (arg1[58]); + x9 = ((uint64_t)(arg1[57]) << 50); + x10 = ((uint64_t)(arg1[56]) << 42); + x11 = ((uint64_t)(arg1[55]) << 34); + x12 = ((uint64_t)(arg1[54]) << 26); + x13 = ((uint64_t)(arg1[53]) << 18); + x14 = ((uint64_t)(arg1[52]) << 10); + x15 = ((uint64_t)(arg1[51]) << 2); + x16 = ((uint64_t)(arg1[50]) << 52); + x17 = ((uint64_t)(arg1[49]) << 44); + x18 = ((uint64_t)(arg1[48]) << 36); + x19 = ((uint64_t)(arg1[47]) << 28); + x20 = ((uint64_t)(arg1[46]) << 20); + x21 = ((uint64_t)(arg1[45]) << 12); + x22 = ((uint64_t)(arg1[44]) << 4); + x23 = ((uint64_t)(arg1[43]) << 54); + x24 = ((uint64_t)(arg1[42]) << 46); + x25 = ((uint64_t)(arg1[41]) << 38); + x26 = ((uint64_t)(arg1[40]) << 30); + x27 = ((uint64_t)(arg1[39]) << 22); + x28 = ((uint64_t)(arg1[38]) << 14); + x29 = ((uint64_t)(arg1[37]) << 6); + x30 = ((uint64_t)(arg1[36]) << 56); + x31 = ((uint64_t)(arg1[35]) << 48); + x32 = ((uint64_t)(arg1[34]) << 40); + x33 = ((uint64_t)(arg1[33]) << 32); + x34 = ((uint64_t)(arg1[32]) << 24); + x35 = ((uint64_t)(arg1[31]) << 16); + x36 = ((uint64_t)(arg1[30]) << 8); + x37 = (arg1[29]); + x38 = ((uint64_t)(arg1[28]) << 50); + x39 = ((uint64_t)(arg1[27]) << 42); + x40 = ((uint64_t)(arg1[26]) << 34); + x41 = ((uint64_t)(arg1[25]) << 26); + x42 = ((uint64_t)(arg1[24]) << 18); + x43 = ((uint64_t)(arg1[23]) << 10); + x44 = ((uint64_t)(arg1[22]) << 2); + x45 = ((uint64_t)(arg1[21]) << 52); + x46 = ((uint64_t)(arg1[20]) << 44); + x47 = ((uint64_t)(arg1[19]) << 36); + x48 = ((uint64_t)(arg1[18]) << 28); + x49 = ((uint64_t)(arg1[17]) << 20); + x50 = ((uint64_t)(arg1[16]) << 12); + x51 = ((uint64_t)(arg1[15]) << 4); + x52 = ((uint64_t)(arg1[14]) << 54); + x53 = ((uint64_t)(arg1[13]) << 46); + x54 = ((uint64_t)(arg1[12]) << 38); + x55 = ((uint64_t)(arg1[11]) << 30); + x56 = ((uint64_t)(arg1[10]) << 22); + x57 = ((uint64_t)(arg1[9]) << 14); + x58 = ((uint64_t)(arg1[8]) << 6); + x59 = ((uint64_t)(arg1[7]) << 56); + x60 = ((uint64_t)(arg1[6]) << 48); + x61 = ((uint64_t)(arg1[5]) << 40); + x62 = ((uint64_t)(arg1[4]) << 32); + x63 = ((uint64_t)(arg1[3]) << 24); + x64 = ((uint64_t)(arg1[2]) << 16); + x65 = ((uint64_t)(arg1[1]) << 8); + x66 = (arg1[0]); + x67 = (x65 + (uint64_t)x66); + x68 = (x64 + x67); + x69 = (x63 + x68); + x70 = (x62 + x69); + x71 = (x61 + x70); + x72 = (x60 + x71); + x73 = (x59 + x72); + x74 = (x73 & UINT64_C(0x3ffffffffffffff)); + x75 = (uint8_t)(x73 >> 58); + x76 = (x58 + (uint64_t)x75); + x77 = (x57 + x76); + x78 = (x56 + x77); + x79 = (x55 + x78); + x80 = (x54 + x79); + x81 = (x53 + x80); + x82 = (x52 + x81); + x83 = (x82 & UINT64_C(0x3ffffffffffffff)); + x84 = (uint8_t)(x82 >> 58); + x85 = (x51 + (uint64_t)x84); + x86 = (x50 + x85); + x87 = (x49 + x86); + x88 = (x48 + x87); + x89 = (x47 + x88); + x90 = (x46 + x89); + x91 = (x45 + x90); + x92 = (x91 & UINT64_C(0x3ffffffffffffff)); + x93 = (uint8_t)(x91 >> 58); + x94 = (x44 + (uint64_t)x93); + x95 = (x43 + x94); + x96 = (x42 + x95); + x97 = (x41 + x96); + x98 = (x40 + x97); + x99 = (x39 + x98); + x100 = (x38 + x99); + x101 = (x36 + (uint64_t)x37); + x102 = (x35 + x101); + x103 = (x34 + x102); + x104 = (x33 + x103); + x105 = (x32 + x104); + x106 = (x31 + x105); + x107 = (x30 + x106); + x108 = (x107 & UINT64_C(0x3ffffffffffffff)); + x109 = (uint8_t)(x107 >> 58); + x110 = (x29 + (uint64_t)x109); + x111 = (x28 + x110); + x112 = (x27 + x111); + x113 = (x26 + x112); + x114 = (x25 + x113); + x115 = (x24 + x114); + x116 = (x23 + x115); + x117 = (x116 & UINT64_C(0x3ffffffffffffff)); + x118 = (uint8_t)(x116 >> 58); + x119 = (x22 + (uint64_t)x118); + x120 = (x21 + x119); + x121 = (x20 + x120); + x122 = (x19 + x121); + x123 = (x18 + x122); + x124 = (x17 + x123); + x125 = (x16 + x124); + x126 = (x125 & UINT64_C(0x3ffffffffffffff)); + x127 = (uint8_t)(x125 >> 58); + x128 = (x15 + (uint64_t)x127); + x129 = (x14 + x128); + x130 = (x13 + x129); + x131 = (x12 + x130); + x132 = (x11 + x131); + x133 = (x10 + x132); + x134 = (x9 + x133); + x135 = (x7 + (uint64_t)x8); + x136 = (x6 + x135); + x137 = (x5 + x136); + x138 = (x4 + x137); + x139 = (x3 + x138); + x140 = (x2 + x139); + x141 = (x1 + x140); + out1[0] = x74; + out1[1] = x83; + out1[2] = x92; + out1[3] = x100; + out1[4] = x108; + out1[5] = x117; + out1[6] = x126; + out1[7] = x134; + out1[8] = x141; +} + +/* END verbatim fiat code */ + +/* curve-related constants */ + +static const limb_t const_one[9] = { + UINT64_C(0x0000000000000001), UINT64_C(0x0000000000000000), + UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), + UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), + UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), + UINT64_C(0x0000000000000000) +}; + +static const limb_t const_b[9] = { + UINT64_C(0x03451FD46B503F00), UINT64_C(0x00F7E20F4B0D3C7B), + UINT64_C(0x000BD3BB1BF07357), UINT64_C(0x0147B1FA4DEC594B), + UINT64_C(0x018EF109E1561939), UINT64_C(0x026CC57CEE2D2264), + UINT64_C(0x00540EEA2DA725B9), UINT64_C(0x02687E4A688682DA), + UINT64_C(0x0051953EB9618E1C) +}; + +/* LUT for scalar multiplication by comb interleaving */ +static const pt_aff_t lut_cmb[13][16] = { + { + { { UINT64_C(0x017E7E31C2E5BD66), UINT64_C(0x022CF0615A90A6FE), + UINT64_C(0x00127A2FFA8DE334), UINT64_C(0x01DFBF9D64A3F877), + UINT64_C(0x006B4D3DBAA14B5E), UINT64_C(0x014FED487E0A2BD8), + UINT64_C(0x015B4429C6481390), UINT64_C(0x03A73678FB2D988E), + UINT64_C(0x00C6858E06B70404) }, + { UINT64_C(0x00BE94769FD16650), UINT64_C(0x031C21A89CB09022), + UINT64_C(0x039013FAD0761353), UINT64_C(0x02657BD099031542), + UINT64_C(0x03273E662C97EE72), UINT64_C(0x01E6D11A05EBEF45), + UINT64_C(0x03D1BD998F544495), UINT64_C(0x03001172297ED0B1), + UINT64_C(0x011839296A789A3B) } }, + { { UINT64_C(0x01919D2EDE37AD7D), UINT64_C(0x0124218B0CBA8169), + UINT64_C(0x03D16B59FE21BAEB), UINT64_C(0x0128E920C814769A), + UINT64_C(0x012D7A8DD1AD3F16), UINT64_C(0x008F66AE796B5E84), + UINT64_C(0x0159479B52A6E5B1), UINT64_C(0x0065776475A992D6), + UINT64_C(0x01A73D352443DE29) }, + { UINT64_C(0x03588CA1EE86C0E5), UINT64_C(0x01726F24E9641097), + UINT64_C(0x00ED1DEC3C70CF10), UINT64_C(0x033E3715D6C0B56B), + UINT64_C(0x03A355CEEC2E2DD4), UINT64_C(0x02A740C5F4BE2AC7), + UINT64_C(0x03814F2F1557FA82), UINT64_C(0x0377665E7E1B1B2A), + UINT64_C(0x013E9B03B97DFA62) } }, + { { UINT64_C(0x01AB5096EC8F3078), UINT64_C(0x01F879B624C5CE35), + UINT64_C(0x03EAF137E79A329D), UINT64_C(0x01B578C0508DC44B), + UINT64_C(0x00F177ACE4383C0C), UINT64_C(0x014FC34933C0F6AE), + UINT64_C(0x00EB0BF7A596EFDB), UINT64_C(0x00CB1CF6F0CE4701), + UINT64_C(0x00652BF3C52927A4) }, + { UINT64_C(0x033CC3E8DEB090CB), UINT64_C(0x0001C95CD53DFE05), + UINT64_C(0x000211CF5FF79D1F), UINT64_C(0x03241CB3CDD0C455), + UINT64_C(0x01A0347087BB6897), UINT64_C(0x01CB80147B7605F2), + UINT64_C(0x00112911CD8FE8E8), UINT64_C(0x035BB228ADCC452A), + UINT64_C(0x015BE6EF1BDD6601) } }, + { { UINT64_C(0x01CEAD882816ECD4), UINT64_C(0x014FD43F70986680), + UINT64_C(0x01F30DCE3BBC46F9), UINT64_C(0x002AFF1A6363269B), + UINT64_C(0x02F7114C5D8C308D), UINT64_C(0x01520C8A3C0634B0), + UINT64_C(0x0073A0C5F22E0E8F), UINT64_C(0x018D1BBAD97F682C), + UINT64_C(0x0056D5D1D99D5B7F) }, + { UINT64_C(0x006B8BC90525251B), UINT64_C(0x019C4A9777BF1ED7), + UINT64_C(0x0234591CE1A5F9E7), UINT64_C(0x024F37B278AE548E), + UINT64_C(0x0226CBDE556BD0F2), UINT64_C(0x02093C375C76F662), + UINT64_C(0x0168478B5C582D02), UINT64_C(0x0284434760C5E8E7), + UINT64_C(0x003D2D1B7D9BAAA2) } }, + { { UINT64_C(0x0345627967CBE207), UINT64_C(0x002EAF61734A1987), + UINT64_C(0x016DF725A318F4F5), UINT64_C(0x00E584D368D7CF15), + UINT64_C(0x01B8C6B6657429E1), UINT64_C(0x0221D1A64B12AC51), + UINT64_C(0x016D488ED34541B9), UINT64_C(0x00609A8BD6FC55C5), + UINT64_C(0x01585389E359E1E2) }, + { UINT64_C(0x02A0EA86B9AD2A4E), UINT64_C(0x030ABA4A2203CD0E), + UINT64_C(0x02ECF4ABFD87D736), UINT64_C(0x01D5815EB2103FD5), + UINT64_C(0x023DDB446E0D69E5), UINT64_C(0x03873AEDB2096E89), + UINT64_C(0x02E938E3088A654E), UINT64_C(0x03CE7C2D5555E89E), + UINT64_C(0x002A2E618C9A8AED) } }, + { { UINT64_C(0x00C0E02DDA0CDB9A), UINT64_C(0x030093E9326A40BB), + UINT64_C(0x01AEBE3191085015), UINT64_C(0x00CC998F686F466C), + UINT64_C(0x00F2991652F3DBC5), UINT64_C(0x0305E12550FBCB15), + UINT64_C(0x00315CFED5DC7ED7), UINT64_C(0x03FD51BC68E55CED), + UINT64_C(0x008A75841259FDED) }, + { UINT64_C(0x00874F92CE48C808), UINT64_C(0x032038FD2066D756), + UINT64_C(0x0331914A95336DCA), UINT64_C(0x003A2D0A92ACE248), + UINT64_C(0x00E0B9B82B1BC8A9), UINT64_C(0x002F4124FB4BA575), + UINT64_C(0x00FB2293ADD56621), UINT64_C(0x00A6127432A1DC15), + UINT64_C(0x0096FB303FCBBA21) } }, + { { UINT64_C(0x0087848D32FBCDA7), UINT64_C(0x030EC02ACE3BFE06), + UINT64_C(0x025E79AB88EE94BE), UINT64_C(0x002380F265A8D542), + UINT64_C(0x02AF5B866132C459), UINT64_C(0x006D308E13BB74AF), + UINT64_C(0x024861A93F736CDE), UINT64_C(0x02B6735E1974AD24), + UINT64_C(0x007E3E98F984C396) }, + { UINT64_C(0x011A01FB022A71C9), UINT64_C(0x027AABE445FA7DCA), + UINT64_C(0x01D351CBFBBC3619), UINT64_C(0x0160E2F1D8FC9B7F), + UINT64_C(0x025C1E212AC1BD5D), UINT64_C(0x03550871A71E99EB), + UINT64_C(0x02D5A08CED50A386), UINT64_C(0x03B6A468649B6A8F), + UINT64_C(0x0108EE58EB6D781F) } }, + { { UINT64_C(0x01AFE337BCB8DB55), UINT64_C(0x0365A6078FE4AF7A), + UINT64_C(0x03D1C8FC0331D9B8), UINT64_C(0x009F6F403FF9E1D6), + UINT64_C(0x02DF128E11B91CCE), UINT64_C(0x01028214B5A5ED4C), + UINT64_C(0x014300FB8FBCC30B), UINT64_C(0x0197C105563F151B), + UINT64_C(0x006B6AD89ABCB924) }, + { UINT64_C(0x02343480A1475465), UINT64_C(0x036433111AAF7655), + UINT64_C(0x022232C96C99246F), UINT64_C(0x0322651C2A008523), + UINT64_C(0x0197485ED57E9062), UINT64_C(0x02B4832E92D8841A), + UINT64_C(0x02DBF63DF0496A9B), UINT64_C(0x0075A9F399348CCF), + UINT64_C(0x01B468DA27157139) } }, + { { UINT64_C(0x02F817A853110AE0), UINT64_C(0x00C10ABC3469041D), + UINT64_C(0x0399B5681380FF8C), UINT64_C(0x0399D3F80A1F7D39), + UINT64_C(0x0269250858760A69), UINT64_C(0x03E8ACED3599493C), + UINT64_C(0x023906A99EE9E269), UINT64_C(0x03684E82E1D19164), + UINT64_C(0x01B00DDB707F130E) }, + { UINT64_C(0x01B9CB7C70E64647), UINT64_C(0x00156530ADD57D4D), + UINT64_C(0x0357F16ADF420E69), UINT64_C(0x013BDB742FC34BD9), + UINT64_C(0x0322A1323DF9DA56), UINT64_C(0x01A6442A635A2B0A), + UINT64_C(0x01DD106B799534CF), UINT64_C(0x01DB6F04475392BB), + UINT64_C(0x0085683F1D7DB165) } }, + { { UINT64_C(0x00FF0B2418D6A19B), UINT64_C(0x03D0C79C96EF791E), + UINT64_C(0x0157D7A45970DFEC), UINT64_C(0x0258D899A59E48C9), + UINT64_C(0x033790E7F1FA3B30), UINT64_C(0x0177D51FBFFC2B36), + UINT64_C(0x021A07245B77E075), UINT64_C(0x00D21F03E5230B56), + UINT64_C(0x00998DCCE486419C) }, + { UINT64_C(0x01091A695BFD0575), UINT64_C(0x013627AA7EFF912A), + UINT64_C(0x039991631C377F5A), UINT64_C(0x00FFCBAE33E6C3B0), + UINT64_C(0x036545772773AD96), UINT64_C(0x02DEF3D2B3143BB8), + UINT64_C(0x01B245D67D28AEE2), UINT64_C(0x03B5730E50925D4D), + UINT64_C(0x0137D5DA0626A021) } }, + { { UINT64_C(0x02EF399693C8C9ED), UINT64_C(0x032480E4E91B4B50), + UINT64_C(0x03EAED827D75B37A), UINT64_C(0x02B9358A8C276525), + UINT64_C(0x019C467FA946257E), UINT64_C(0x03B457A606548F9D), + UINT64_C(0x02D3B10268BB98C2), UINT64_C(0x034BECF321542167), + UINT64_C(0x01A1CBB2C11A742B) }, + { UINT64_C(0x020BC43C9CBA4DF5), UINT64_C(0x02C3C5D92732D879), + UINT64_C(0x03A372C63EEC57C9), UINT64_C(0x014F6920CA56FAD0), + UINT64_C(0x036BAFA7F7DF741A), UINT64_C(0x01464F9B06028A5B), + UINT64_C(0x000CE62E83C0059C), UINT64_C(0x00F520B04B69F179), + UINT64_C(0x011A209D7D4F8EEB) } }, + { { UINT64_C(0x01C6A5ECE2AF535C), UINT64_C(0x007C6B09AB9601A8), + UINT64_C(0x038E9A5EC53E207E), UINT64_C(0x03F26BD6C2BFA78F), + UINT64_C(0x010CDD45101F6F83), UINT64_C(0x0217ECA0924348D3), + UINT64_C(0x0147B8EEE7A39BA7), UINT64_C(0x024DDB6C72B3B17D), + UINT64_C(0x01AE0B275D729015) }, + { UINT64_C(0x0015C3536FA0D000), UINT64_C(0x02D1142A348E15B6), + UINT64_C(0x0327BB07DD0C2213), UINT64_C(0x0187BA5FF3D0F09E), + UINT64_C(0x0044C2DC0E108433), UINT64_C(0x0034160CAD0C591E), + UINT64_C(0x028471C7D759FF89), UINT64_C(0x00E019A28A163F01), + UINT64_C(0x00F2C97A825E5385) } }, + { { UINT64_C(0x038C2460BF70ACE0), UINT64_C(0x0383AC70974FEC4F), + UINT64_C(0x03E2AA648FF27E41), UINT64_C(0x0245F0DBB9355BA1), + UINT64_C(0x005499994AA91856), UINT64_C(0x006C41EC471DCB23), + UINT64_C(0x01FF9D2007310265), UINT64_C(0x0060D28D61D29BD7), + UINT64_C(0x0154E84C6D5C5A9A) }, + { UINT64_C(0x0325BCE404C78230), UINT64_C(0x038A9519CB9ADB50), + UINT64_C(0x0370A6A5972F5EED), UINT64_C(0x00D5CBEF06834788), + UINT64_C(0x00151666A6DEE354), UINT64_C(0x0008A831FD9B0A22), + UINT64_C(0x0360D3F15A923EB0), UINT64_C(0x011CEB88A8A3E02E), + UINT64_C(0x00CD0FDCE9171910) } }, + { { UINT64_C(0x017643017002D68B), UINT64_C(0x01581124BB115A0D), + UINT64_C(0x03AEDA0D3163CB21), UINT64_C(0x00F69C67520D44D4), + UINT64_C(0x03E135854D80B212), UINT64_C(0x0393E18B0CFCD461), + UINT64_C(0x01E646F8739535D0), UINT64_C(0x02DA9D8A9353AE22), + UINT64_C(0x0160373EDF8218F9) }, + { UINT64_C(0x03E6AECA5D90B740), UINT64_C(0x03FF9C27516B2CFC), + UINT64_C(0x034F4A8BB572E463), UINT64_C(0x007B64BAF1504EE1), + UINT64_C(0x021A1B22011EFA49), UINT64_C(0x03D4B0EED295BDE3), + UINT64_C(0x006A3FA9FD193C5C), UINT64_C(0x038717960A1006B0), + UINT64_C(0x00F1597050014DCF) } }, + { { UINT64_C(0x003927618EDA25DC), UINT64_C(0x0361657547DB658B), + UINT64_C(0x02B8E847FFB9EF33), UINT64_C(0x001A1DB5CA45000E), + UINT64_C(0x037664A1305CA9BC), UINT64_C(0x0218997B0A2FBCE3), + UINT64_C(0x01A085FF9F45131E), UINT64_C(0x00A1F6CF07EFF2D9), + UINT64_C(0x0174C644D6C94B68) }, + { UINT64_C(0x007BBBC4821A0C30), UINT64_C(0x02649F09BAEFEF46), + UINT64_C(0x0332D706D303F067), UINT64_C(0x0254B383642D4309), + UINT64_C(0x0395AD34B7BE0E21), UINT64_C(0x02D9107F2D73D7AD), + UINT64_C(0x037B7820233EF8FC), UINT64_C(0x0279A016B3256D06), + UINT64_C(0x011AF3A7C2F87F41) } }, + { { UINT64_C(0x0257D0E0C16A8803), UINT64_C(0x03ED792238920488), + UINT64_C(0x001AC09CD6B220DC), UINT64_C(0x02A4132750A7F053), + UINT64_C(0x00A5E7726CD65543), UINT64_C(0x01F0A9985C982A0F), + UINT64_C(0x0307B7DB57458965), UINT64_C(0x01985401A96336DC), + UINT64_C(0x00D8E9920CF30F0C) }, + { UINT64_C(0x024677C739792D19), UINT64_C(0x02F65F1ED50C62B2), + UINT64_C(0x0068CAE4CC263AA1), UINT64_C(0x00C913451E404E6A), + UINT64_C(0x00BED1AA30F76B8C), UINT64_C(0x03C4320182BBEDCB), + UINT64_C(0x00A30EC8B5406328), UINT64_C(0x00E61F7C2704E885), + UINT64_C(0x0127B023B5454A66) } }, + }, + { + { { UINT64_C(0x00E9E114E43C6A8B), UINT64_C(0x027E2C20105A2E23), + UINT64_C(0x03D5B5FA745094EE), UINT64_C(0x01337080223BD7FF), + UINT64_C(0x00D8CCA5AD4589D8), UINT64_C(0x0132DCA140336E19), + UINT64_C(0x0302098FAB8EE167), UINT64_C(0x00625B5791BF1AAD), + UINT64_C(0x01ECCAEB2EF79CDB) }, + { UINT64_C(0x01886BBC26B04438), UINT64_C(0x004F43B6559C663D), + UINT64_C(0x035D8CA99B91E616), UINT64_C(0x01354ED06659D27A), + UINT64_C(0x0054DF4765586194), UINT64_C(0x021052BBF70881C7), + UINT64_C(0x031C5EA1F6288A8B), UINT64_C(0x018AC1ACD36CBDFF), + UINT64_C(0x002E5EDF2873FF52) } }, + { { UINT64_C(0x0192DA26804ED5E3), UINT64_C(0x019DEC17F31925DE), + UINT64_C(0x01585208EBD95AC4), UINT64_C(0x039C6674D066C682), + UINT64_C(0x000715A11D1C0CFA), UINT64_C(0x032AD56C1F218BD5), + UINT64_C(0x0310FABD23E934F9), UINT64_C(0x009AF7F574942B50), + UINT64_C(0x005E0976782CAEF4) }, + { UINT64_C(0x038B0A7A2A7D5B37), UINT64_C(0x0315653FB7DA77BD), + UINT64_C(0x023F157F76616F31), UINT64_C(0x03C8C103329ACAE7), + UINT64_C(0x005A72502EE9CFA2), UINT64_C(0x03204345A2A46FC3), + UINT64_C(0x03666DC71F8A5B63), UINT64_C(0x01671725C07390A9), + UINT64_C(0x01E82DA80D6C216A) } }, + { { UINT64_C(0x02F28395A29D2024), UINT64_C(0x031A09859C9B6A2D), + UINT64_C(0x0047073FD20F177A), UINT64_C(0x03D961594C7CA571), + UINT64_C(0x019555237A9B2EC3), UINT64_C(0x029EFFFB7289E9D9), + UINT64_C(0x008D541E497546F7), UINT64_C(0x0270E93D46DCEE94), + UINT64_C(0x00396B23A204BEFD) }, + { UINT64_C(0x024295052DDD93A9), UINT64_C(0x0081670F33C07709), + UINT64_C(0x00B1D851D4CDFDA9), UINT64_C(0x014DF8329142BB29), + UINT64_C(0x00CDDB9A15F7FCFB), UINT64_C(0x0225454F3A1F5B86), + UINT64_C(0x01A46C8B126C191D), UINT64_C(0x03D3D3229D104DF9), + UINT64_C(0x018B36AD8A91DE12) } }, + { { UINT64_C(0x008FA75A590E92D6), UINT64_C(0x02223AFBB681AD2D), + UINT64_C(0x000DD9E71FEC0AB1), UINT64_C(0x03B4A988F4ABFEC5), + UINT64_C(0x02BDD3FD9A8FB4C8), UINT64_C(0x037A5B9AD9171110), + UINT64_C(0x0225D2934ADB68F2), UINT64_C(0x008BA6F5E067B164), + UINT64_C(0x014EA0A8B0C5768B) }, + { UINT64_C(0x000AB8407662F537), UINT64_C(0x02F781E22DFF31BF), + UINT64_C(0x03E22656A1F21F75), UINT64_C(0x01054C62C579B73D), + UINT64_C(0x0177A8529E6C1116), UINT64_C(0x03211865DCC5D46F), + UINT64_C(0x012706123E7C2723), UINT64_C(0x0396C31AADED99AB), + UINT64_C(0x01637E315762AAD0) } }, + { { UINT64_C(0x03847D336B9839DA), UINT64_C(0x02200E98133D266E), + UINT64_C(0x0039A8261B62D7DC), UINT64_C(0x033295F072A9D5EA), + UINT64_C(0x000C3FE4DCCB2B2A), UINT64_C(0x03907B7861011A91), + UINT64_C(0x023BC0EFEDB5EE58), UINT64_C(0x0288D6CD63BC03CD), + UINT64_C(0x01280E54E8A553CA) }, + { UINT64_C(0x036493BB1C1962CE), UINT64_C(0x0361F9CAD30DAC24), + UINT64_C(0x023856E058F7248C), UINT64_C(0x01EBC4CE9BBA1951), + UINT64_C(0x00FE14205169D78D), UINT64_C(0x01237D85837C8C98), + UINT64_C(0x017C4E2A95E40B90), UINT64_C(0x004E446F2E2C7819), + UINT64_C(0x0007FA80EDA9F2C8) } }, + { { UINT64_C(0x009A65815D2BF9A7), UINT64_C(0x027CB047E8DF8668), + UINT64_C(0x0391C32A60456677), UINT64_C(0x01CBC26A69AB3F09), + UINT64_C(0x0334D4D8DE25229B), UINT64_C(0x0383C0FA69B0DD79), + UINT64_C(0x01D206CDCC54B9E2), UINT64_C(0x02E51DE738338588), + UINT64_C(0x006112D5229EA977) }, + { UINT64_C(0x03CE85BEE20C30CB), UINT64_C(0x02FEBC02D12BC9D5), + UINT64_C(0x02AEDC3A968E7052), UINT64_C(0x02090B846E5AD878), + UINT64_C(0x00E4B6AEE2DDC2E3), UINT64_C(0x00269BE91139208A), + UINT64_C(0x02FEA688006D25C9), UINT64_C(0x002F5EFACF2F785D), + UINT64_C(0x009FE82D05CAC96A) } }, + { { UINT64_C(0x02EE8F69AB2E6D92), UINT64_C(0x0213F64F73B9A354), + UINT64_C(0x000A9DDA2E925D3C), UINT64_C(0x0192E31297313B4F), + UINT64_C(0x02B3145C4DD947AF), UINT64_C(0x03401B39394615DA), + UINT64_C(0x01C98D9DFBE6AE7D), UINT64_C(0x02BB8069EC7A7746), + UINT64_C(0x00A8BDC9CF002A7B) }, + { UINT64_C(0x00A3BF702EB71C5F), UINT64_C(0x00A25EDAE6446CE2), + UINT64_C(0x00108D65D5F288B8), UINT64_C(0x02FF972C1494ABED), + UINT64_C(0x0398342A5B4A102C), UINT64_C(0x00CD83A6E3855FF3), + UINT64_C(0x02D6848441981C93), UINT64_C(0x0335A209E0E8D9AA), + UINT64_C(0x01ED6F04D42258A5) } }, + { { UINT64_C(0x01FC3B47C1490429), UINT64_C(0x01B9A21B27B6F4B1), + UINT64_C(0x0193FF421EE32901), UINT64_C(0x03CC9F551147E445), + UINT64_C(0x01773B6B14BB7010), UINT64_C(0x005040A2326FD6EA), + UINT64_C(0x01949206C0BB7211), UINT64_C(0x02643DEA7E3C37CC), + UINT64_C(0x01725F6694BF623F) }, + { UINT64_C(0x014D9BD8587CA374), UINT64_C(0x020B8D6C1F3C983C), + UINT64_C(0x0395B0E3A7CCCE2F), UINT64_C(0x0071FCA214298293), + UINT64_C(0x038CF96F2462B942), UINT64_C(0x00DD1C97E2E6BCA4), + UINT64_C(0x00DEC4ACF114C9D6), UINT64_C(0x005DCE68C5288587), + UINT64_C(0x017B1DC591DEA2A9) } }, + { { UINT64_C(0x01A03D95A3ACF0F9), UINT64_C(0x0123031B8850C86B), + UINT64_C(0x0269AB94408A086E), UINT64_C(0x0181DEF245438334), + UINT64_C(0x00AB4F62CC0E7BA5), UINT64_C(0x0294A03CC0C2A98D), + UINT64_C(0x02234FBFCCAA23F6), UINT64_C(0x0304B9AF501D1961), + UINT64_C(0x0037258E9F9B8667) }, + { UINT64_C(0x0344657939436D81), UINT64_C(0x010709812083B7CE), + UINT64_C(0x00DBCA5B5A81714D), UINT64_C(0x00396E25D33E3896), + UINT64_C(0x00C0A65FA9547A23), UINT64_C(0x03F6796EDC3F72D8), + UINT64_C(0x022AA55EA0053589), UINT64_C(0x031E838C917FDA1B), + UINT64_C(0x014AF707C515D93F) } }, + { { UINT64_C(0x00E48C0436C8D427), UINT64_C(0x02A85992128BD380), + UINT64_C(0x03861C4538E26A42), UINT64_C(0x027A6E7784D042DB), + UINT64_C(0x0129555575E66B0A), UINT64_C(0x017362D6E2713125), + UINT64_C(0x00A08F82306ED961), UINT64_C(0x007FCDDA0F78CBC0), + UINT64_C(0x010F4598B67DA097) }, + { UINT64_C(0x03448C05AD400463), UINT64_C(0x03CB26D3975CCFCD), + UINT64_C(0x0067B9FD99A88F1D), UINT64_C(0x001F257A56DADDC1), + UINT64_C(0x03AEAFB6384BA84C), UINT64_C(0x0010C9301FE7F887), + UINT64_C(0x03D65C213A46C68C), UINT64_C(0x029BB4A1F8A5E81E), + UINT64_C(0x00C1838AFD6E3F39) } }, + { { UINT64_C(0x03CE07505924C15F), UINT64_C(0x0043A08ED31A1B99), + UINT64_C(0x0339C4C25E8B8B88), UINT64_C(0x0380DFF73DEBF4DA), + UINT64_C(0x031FBA11E366BE60), UINT64_C(0x001D2B7C0FA8BD42), + UINT64_C(0x009DE3ACE8B8A24D), UINT64_C(0x02B5F07FB5B5BD4F), + UINT64_C(0x018247CA534C6F7F) }, + { UINT64_C(0x01E0A02B3DBEEE78), UINT64_C(0x001E200666AB15CD), + UINT64_C(0x0186BEA684E8C48E), UINT64_C(0x00F3F1894CDB68E0), + UINT64_C(0x032ECC59DF1BBB85), UINT64_C(0x02D06C53B9B53209), + UINT64_C(0x004A86739B90C8A8), UINT64_C(0x03AD8A97D98C89BC), + UINT64_C(0x00F01344204A1E2F) } }, + { { UINT64_C(0x03582A68690F8C80), UINT64_C(0x012E151E3D7485DA), + UINT64_C(0x02527AD70F6AC0B4), UINT64_C(0x018B935CB107A3CD), + UINT64_C(0x036AA37D7A7E3625), UINT64_C(0x034CFB229578C67F), + UINT64_C(0x00A3FBC7740B7E16), UINT64_C(0x03D0C73BF6F5756D), + UINT64_C(0x009FFA51FEAC33FA) }, + { UINT64_C(0x0208A8D791982847), UINT64_C(0x03EDDBD997642B6C), + UINT64_C(0x025D551977914C26), UINT64_C(0x02DD352759CA1376), + UINT64_C(0x00654090371E1000), UINT64_C(0x004AC720BEC03C34), + UINT64_C(0x03C06BE7F6C95884), UINT64_C(0x01FA475777DF0765), + UINT64_C(0x00A99275E15E46C7) } }, + { { UINT64_C(0x016A50E0A643409F), UINT64_C(0x0122617180184D38), + UINT64_C(0x0105E92945AC97AB), UINT64_C(0x01A1B865FE31BAD8), + UINT64_C(0x033E0DC143E2D46B), UINT64_C(0x03DD157DF58A1946), + UINT64_C(0x02DF8E8C2EC7FB6D), UINT64_C(0x00E031916AFF1478), + UINT64_C(0x017A7BE92C9A8A1C) }, + { UINT64_C(0x02063F9B1AF2F29D), UINT64_C(0x0275AF845DF62346), + UINT64_C(0x010016B05B22BD9F), UINT64_C(0x03772DD9DE8A3F70), + UINT64_C(0x011B489BE6C04500), UINT64_C(0x0122DEDE177B839E), + UINT64_C(0x008B9ED1DBF81860), UINT64_C(0x00CDA67D0D8CEDC1), + UINT64_C(0x01984030C18BF083) } }, + { { UINT64_C(0x02791762137B93A2), UINT64_C(0x01F9DE3C5491E823), + UINT64_C(0x01E50243877F23E4), UINT64_C(0x0144F0B0081F37BC), + UINT64_C(0x00D7A781DD6DE5E2), UINT64_C(0x036A5EFE959E26D1), + UINT64_C(0x03A51922038AEEA2), UINT64_C(0x0054D452C10BD4F0), + UINT64_C(0x01B8A51151884AEF) }, + { UINT64_C(0x0241D85F77A00331), UINT64_C(0x023528AF19A313C4), + UINT64_C(0x0176DFC98292A79E), UINT64_C(0x03AADEBB4F7B06B1), + UINT64_C(0x00DAB141E4CE727F), UINT64_C(0x0388E18953348B42), + UINT64_C(0x03FD5A751265E468), UINT64_C(0x024673750B3DB1AB), + UINT64_C(0x00E57DD5F1A23923) } }, + { { UINT64_C(0x019D69A891328CE4), UINT64_C(0x008F01053E7A765C), + UINT64_C(0x030B5EE16F612292), UINT64_C(0x020A99C1AB590289), + UINT64_C(0x01D62D438BE82D64), UINT64_C(0x037D8D3250B87A70), + UINT64_C(0x03ACF90A3316DB71), UINT64_C(0x011F2D638816284F), + UINT64_C(0x000D63B1CF94E578) }, + { UINT64_C(0x026288694B620A88), UINT64_C(0x01D7EC9688B643F5), + UINT64_C(0x0329AC344C36F494), UINT64_C(0x01F7C91E725E18A1), + UINT64_C(0x02FEB98C58EA0341), UINT64_C(0x00A508DDA6BF1EC5), + UINT64_C(0x00733B2463BD7A85), UINT64_C(0x0384EBC8AB299B36), + UINT64_C(0x00074909BD45312A) } }, + { { UINT64_C(0x03E08C2C5C95FF29), UINT64_C(0x00C670644C808211), + UINT64_C(0x012D8021671FE338), UINT64_C(0x039F033363AA44CD), + UINT64_C(0x0337E7DB83662796), UINT64_C(0x03DDF327E2706223), + UINT64_C(0x005FBC050700CAFA), UINT64_C(0x020FC3C9D5CBB556), + UINT64_C(0x0105E1BC0BF33DC4) }, + { UINT64_C(0x03F3D06894519732), UINT64_C(0x029248D001BE65FE), + UINT64_C(0x011EC77A8F1A11E3), UINT64_C(0x0365A31B2279F38F), + UINT64_C(0x014E2577747A12CD), UINT64_C(0x0160E01F73DAA243), + UINT64_C(0x01E3B9CC567EDCCC), UINT64_C(0x03E1B7F6A7B42960), + UINT64_C(0x01809B863B2F3F5A) } }, + }, + { + { { UINT64_C(0x0373B24CDED2EB86), UINT64_C(0x02402CCFAA26116F), + UINT64_C(0x005073857CDB6102), UINT64_C(0x01AE6F89575C7623), + UINT64_C(0x022FF653B3A939A7), UINT64_C(0x0190B7CB0A3545D4), + UINT64_C(0x02353B26D8170467), UINT64_C(0x003C64522D17855F), + UINT64_C(0x01E5D565F776B34F) }, + { UINT64_C(0x025185A2C4B5DE1E), UINT64_C(0x02B3AFFAB7E382B2), + UINT64_C(0x0194B86479736527), UINT64_C(0x026B4BE5E81594AE), + UINT64_C(0x01D6960578E25220), UINT64_C(0x00993E60F26C1FF2), + UINT64_C(0x019B938479BA949D), UINT64_C(0x01FCA32034CAD7A3), + UINT64_C(0x017759280D580A6A) } }, + { { UINT64_C(0x02346AE90C2CA70B), UINT64_C(0x013757CC55F070F5), + UINT64_C(0x017E107D86CA7681), UINT64_C(0x005AD490EBA565E0), + UINT64_C(0x02C9C614514CB60C), UINT64_C(0x03BEAF2AC475AF2B), + UINT64_C(0x008C591B4CE3CC44), UINT64_C(0x014A9DDFA491CE57), + UINT64_C(0x001268735793A719) }, + { UINT64_C(0x007F97B31426994D), UINT64_C(0x01A96DF191B418F1), + UINT64_C(0x027DF055755518F4), UINT64_C(0x025DAAC2254C5D3C), + UINT64_C(0x0262D34E340FC2C3), UINT64_C(0x01F14824C8F72557), + UINT64_C(0x02A4819301BACB9F), UINT64_C(0x0268E03E6BEAB510), + UINT64_C(0x00EA805018D6E199) } }, + { { UINT64_C(0x00FEA5E6ABEE1F7B), UINT64_C(0x00538DB9B2D8E2D4), + UINT64_C(0x0305BA64218318A0), UINT64_C(0x022BD39A67AA3F20), + UINT64_C(0x01157632723B17F9), UINT64_C(0x00C8DAAF646E78C9), + UINT64_C(0x0158EFBD367A27CD), UINT64_C(0x011375E95CB4F12F), + UINT64_C(0x005E40D5A4D44054) }, + { UINT64_C(0x0297475C1D71A4FA), UINT64_C(0x03C1DABD876A7908), + UINT64_C(0x0038CB20D99CAE76), UINT64_C(0x03D63A3A005959E9), + UINT64_C(0x02AF78B93B764B6F), UINT64_C(0x0109A0342CFC2D30), + UINT64_C(0x01C301BEC294E434), UINT64_C(0x01972384DAD5FD67), + UINT64_C(0x01C3F5C9DF46F8D3) } }, + { { UINT64_C(0x03C115A0432574BE), UINT64_C(0x01495DBDA1F302E5), + UINT64_C(0x010568069CC94673), UINT64_C(0x000A2EEAB0E37751), + UINT64_C(0x033EE9D566902CC4), UINT64_C(0x006B34AFED584340), + UINT64_C(0x02B50803E9B165A1), UINT64_C(0x03E38D1CBBEC3EC2), + UINT64_C(0x0023CF19CC14F82C) }, + { UINT64_C(0x01CCAAFE462EC0F0), UINT64_C(0x02E714845D028EE6), + UINT64_C(0x02DCB47FF5021595), UINT64_C(0x030908AA9B079880), + UINT64_C(0x00371B5A69854385), UINT64_C(0x0185FE540E9AE9FF), + UINT64_C(0x02EE86F4F1A83CE4), UINT64_C(0x03AB730574E67F57), + UINT64_C(0x01F85953DB252C4B) } }, + { { UINT64_C(0x02EC254BFD8CB3CC), UINT64_C(0x01DFEE8DE5F7858B), + UINT64_C(0x019C8AD2711F9096), UINT64_C(0x00B1E57CC4C26707), + UINT64_C(0x03511BB53983E402), UINT64_C(0x02A4019CDD626E9F), + UINT64_C(0x03BA2E0AC5C44D84), UINT64_C(0x00A965FE7663AD49), + UINT64_C(0x01739420DA2DD7E5) }, + { UINT64_C(0x001E59C7B82FB619), UINT64_C(0x007B29CCEEF8AD83), + UINT64_C(0x02907C71BFFAE931), UINT64_C(0x003F110EC15CB5CF), + UINT64_C(0x02A76ECA58531793), UINT64_C(0x02D8D0EB5EA2FA03), + UINT64_C(0x0302231943B524FC), UINT64_C(0x01EBC24F8F0A0C29), + UINT64_C(0x019802CBF5F3CE73) } }, + { { UINT64_C(0x01852168BF26ECDA), UINT64_C(0x03BA5FFA1597B73C), + UINT64_C(0x00E55E47A88BF735), UINT64_C(0x03EF5511C575EFAA), + UINT64_C(0x03BEAAED274CB2F4), UINT64_C(0x01A2B7AEE5E82012), + UINT64_C(0x00161524928CEDED), UINT64_C(0x0243FB8CEB1DB1ED), + UINT64_C(0x00A939AAE7662875) }, + { UINT64_C(0x035FC996431E0BB4), UINT64_C(0x03871F05A029588C), + UINT64_C(0x024685D44F302D5A), UINT64_C(0x03D65DBBB0A24C64), + UINT64_C(0x031CCDBD89C13824), UINT64_C(0x03EEC80794841ADF), + UINT64_C(0x02BDD19433E827DB), UINT64_C(0x025D0DEF338BCA12), + UINT64_C(0x019DD1E057A3957F) } }, + { { UINT64_C(0x028221686CEBC7BE), UINT64_C(0x00550CAC829C5C56), + UINT64_C(0x024473DA711003E5), UINT64_C(0x01D2D356A63016BD), + UINT64_C(0x016B5C937B93F5AA), UINT64_C(0x016BA509AE911631), + UINT64_C(0x03BB387F2983AA08), UINT64_C(0x0087050F624145D1), + UINT64_C(0x00430D39E6B578E6) }, + { UINT64_C(0x02E690EFE2E3859D), UINT64_C(0x021D189217E0C7B9), + UINT64_C(0x03BC89797B1B794C), UINT64_C(0x01D6B16B566AB9D7), + UINT64_C(0x02935CEB8993E4D1), UINT64_C(0x03C0BF4C7D6967AE), + UINT64_C(0x00EA7B0862929371), UINT64_C(0x014624F22194B5D9), + UINT64_C(0x00D68221B3478C47) } }, + { { UINT64_C(0x03BEC558C2EB8133), UINT64_C(0x031106A5F911659D), + UINT64_C(0x00D07C39AEFB3CBE), UINT64_C(0x02F06E730A651F25), + UINT64_C(0x0183C527F019A937), UINT64_C(0x0153E778C8608775), + UINT64_C(0x0214C61DB43A7203), UINT64_C(0x00CD284ED5892F97), + UINT64_C(0x0198EB083CFD5B2B) }, + { UINT64_C(0x0393B136D6835A15), UINT64_C(0x03ED1013491B6647), + UINT64_C(0x00702068040A8E55), UINT64_C(0x0136DD3C55BF5BE4), + UINT64_C(0x03D053D6F8B28F3A), UINT64_C(0x00FAF9585D310B40), + UINT64_C(0x002690874B88A2A9), UINT64_C(0x02651384F1D8C181), + UINT64_C(0x00E5D3BFA7EC53DE) } }, + { { UINT64_C(0x033F039A91D85118), UINT64_C(0x03A170E9A74E89EC), + UINT64_C(0x03EBE8F17E2B4C68), UINT64_C(0x032E08DD52962FFF), + UINT64_C(0x01F682C887362E38), UINT64_C(0x02848A835A72A2EE), + UINT64_C(0x00AFA36F7A88966F), UINT64_C(0x02D505E8ED473B2D), + UINT64_C(0x007B6EF0E4DAA123) }, + { UINT64_C(0x03F322E8CD472029), UINT64_C(0x009B31F349123C63), + UINT64_C(0x024396A463AE29B2), UINT64_C(0x035A559411C8D9B7), + UINT64_C(0x0302AAF84FEF53A7), UINT64_C(0x00322717487DC79C), + UINT64_C(0x02CA6AE27A92266C), UINT64_C(0x03E6B6580391B525), + UINT64_C(0x00647CC677EE4353) } }, + { { UINT64_C(0x0015F4FB3CE12393), UINT64_C(0x013D9CD65B87D1CA), + UINT64_C(0x03ED1458BDACF05A), UINT64_C(0x011BC2A44D7A03F7), + UINT64_C(0x00D1E2748EE247CF), UINT64_C(0x025C05134193D6D7), + UINT64_C(0x03D8D4701057B20F), UINT64_C(0x03CD86409D914C19), + UINT64_C(0x0123EE9725146150) }, + { UINT64_C(0x03B85772CCE5DBF5), UINT64_C(0x024E60E34E33C627), + UINT64_C(0x00CEB58FBCFD7F20), UINT64_C(0x0213A9AF85D15B81), + UINT64_C(0x00879FD075FE76EA), UINT64_C(0x01883D1962AC7DA6), + UINT64_C(0x0041CDD770D92E82), UINT64_C(0x024CF83E19940701), + UINT64_C(0x0001A7D69F562E49) } }, + { { UINT64_C(0x03F06D3661D1EEDB), UINT64_C(0x01062600B09B6B3E), + UINT64_C(0x01A0A640D07EFC7A), UINT64_C(0x0317F67E20F296A1), + UINT64_C(0x034843017C701C3C), UINT64_C(0x033891152A103E33), + UINT64_C(0x01C00AE12BC93968), UINT64_C(0x0280A3403412AA1F), + UINT64_C(0x0111DA6A8E2C4EE1) }, + { UINT64_C(0x0138BBADC5A4238D), UINT64_C(0x02BB1A5504498DAF), + UINT64_C(0x03D55FD7A02F99F7), UINT64_C(0x030B36D2716AAE98), + UINT64_C(0x00846799916170BE), UINT64_C(0x021843A1130EBD86), + UINT64_C(0x01602A0048ED7277), UINT64_C(0x010F628883F5C170), + UINT64_C(0x00A879F20138FE97) } }, + { { UINT64_C(0x010B697E6BB71E17), UINT64_C(0x00A5FF1EE44F8A1A), + UINT64_C(0x02F0A65F0594ADDF), UINT64_C(0x01B97DFF3B989E00), + UINT64_C(0x02EBB1D34E1BC0B6), UINT64_C(0x0318AB0F908D45CA), + UINT64_C(0x006D84E0ECA51F49), UINT64_C(0x022CBEFDFAF29F0C), + UINT64_C(0x019FF3250EDA2D48) }, + { UINT64_C(0x0247BD9A1791633D), UINT64_C(0x001017CA6D44DB39), + UINT64_C(0x001392DBCF3C08AE), UINT64_C(0x00BBFD8C9245DBED), + UINT64_C(0x03C6094D363A2A9B), UINT64_C(0x0026C46C1B980722), + UINT64_C(0x014C00915831C495), UINT64_C(0x03480A51EA642A61), + UINT64_C(0x018A2CD0EE26C545) } }, + { { UINT64_C(0x00179F4F97812A25), UINT64_C(0x02A5E9E3F33BC581), + UINT64_C(0x000BD5248493D239), UINT64_C(0x02B7DE8E94D0B6E5), + UINT64_C(0x01D8674B49C2359A), UINT64_C(0x020163E368BE3C3B), + UINT64_C(0x0332717F9505C7C1), UINT64_C(0x035A143000B7EC9C), + UINT64_C(0x00C999A3E0BCCAF1) }, + { UINT64_C(0x007B047729EF75E3), UINT64_C(0x02CC12EE110A5B9B), + UINT64_C(0x0330E2E6286E55F0), UINT64_C(0x00C6FC4CB1CD5C12), + UINT64_C(0x014B93EA65F0CCE4), UINT64_C(0x01E5A20D3788D937), + UINT64_C(0x039AB1AC6BF17BFB), UINT64_C(0x0397FE82B1886D3A), + UINT64_C(0x000C112A21CE8FCD) } }, + { { UINT64_C(0x02B7C1C48CF8D334), UINT64_C(0x0078EAF1E0B9AA5A), + UINT64_C(0x0397B9A209EF9EF0), UINT64_C(0x001CFFAFD847B222), + UINT64_C(0x0321A14F818F0142), UINT64_C(0x0214D3F98F9D0ED8), + UINT64_C(0x011305B71C04D0D3), UINT64_C(0x03DE98EACA808006), + UINT64_C(0x01360AA21413198A) }, + { UINT64_C(0x028D3F07FD51E170), UINT64_C(0x023F03474306CBA2), + UINT64_C(0x034205D496752F99), UINT64_C(0x02D4BC03F380060F), + UINT64_C(0x01E2CE3EBF008299), UINT64_C(0x03EE2B7C9CF44A54), + UINT64_C(0x022CB7C6BCE06379), UINT64_C(0x03934E9100F4AD3F), + UINT64_C(0x001B8D6D7EA30D7F) } }, + { { UINT64_C(0x0175E6F14594D02E), UINT64_C(0x0107CFBBB666C104), + UINT64_C(0x0043C920F3FC7184), UINT64_C(0x01D3F596321DF679), + UINT64_C(0x034FBFA8E62660AC), UINT64_C(0x02F07B7B2F64B7D6), + UINT64_C(0x020B7A4B1CB30890), UINT64_C(0x0027370AF3A01ACE), + UINT64_C(0x004C3DF94ED57F1B) }, + { UINT64_C(0x02F7E28D420891BB), UINT64_C(0x00A165AF3355D551), + UINT64_C(0x03E2077F4C7840E2), UINT64_C(0x010A42F1F956CFC2), + UINT64_C(0x01586FF6FC545309), UINT64_C(0x00E2A2E3F8A44D6A), + UINT64_C(0x01BCD7CFAB0CD9EA), UINT64_C(0x02CD7B5AA257EF8B), + UINT64_C(0x01E161EB6461E56F) } }, + { { UINT64_C(0x03AA1E440B1B7656), UINT64_C(0x02DB3F4D449DEBD4), + UINT64_C(0x025617A010F1A335), UINT64_C(0x010C03757E20D72C), + UINT64_C(0x01EA95F9EFACD59B), UINT64_C(0x0126D8DDDE17B239), + UINT64_C(0x02DBF2D291F6AEC7), UINT64_C(0x02F6100FC8834353), + UINT64_C(0x00C18C83BB58FB77) }, + { UINT64_C(0x03754C15A7EEE80E), UINT64_C(0x00247AB9412690FE), + UINT64_C(0x016E9C7BD742F5DF), UINT64_C(0x02361FAE95827D75), + UINT64_C(0x029E41CC30EA15A1), UINT64_C(0x005F53D5863CB83F), + UINT64_C(0x0025C9FC701A2B9B), UINT64_C(0x0389C7702E9DAFBA), + UINT64_C(0x00ED3C35310B5895) } }, + }, + { + { { UINT64_C(0x0373C85A8201C48B), UINT64_C(0x000BE293272BB8C3), + UINT64_C(0x0299641D84048EF5), UINT64_C(0x012EE83CEE0A37DD), + UINT64_C(0x00D6A81ED893F8A3), UINT64_C(0x01988A5103EE9A5B), + UINT64_C(0x01495F90BE6C8319), UINT64_C(0x00954437A6A3C821), + UINT64_C(0x010E12D843E6580B) }, + { UINT64_C(0x007820FBE51DE678), UINT64_C(0x013364C5E0C684D4), + UINT64_C(0x009D1721196C2E40), UINT64_C(0x01933769A5FD2063), + UINT64_C(0x00BAB8B58BEFA01A), UINT64_C(0x012866F6B7334CBC), + UINT64_C(0x025340A51AC6E1FB), UINT64_C(0x03B1135009A4FD38), + UINT64_C(0x018AD6567590AFBB) } }, + { { UINT64_C(0x03F7CC1DCD9C3B89), UINT64_C(0x03F2238DF027BB54), + UINT64_C(0x014C7FD4BA95DD01), UINT64_C(0x01DBD8CC489F6AB6), + UINT64_C(0x03A6066BFEA7BAB5), UINT64_C(0x0065E8AD52465D5E), + UINT64_C(0x03E8F9DA8D525106), UINT64_C(0x001A6869F0B37603), + UINT64_C(0x016D47A0587C292E) }, + { UINT64_C(0x0374FC0618A5170B), UINT64_C(0x0152FB1A3C0C1CC0), + UINT64_C(0x01710A373C6A380E), UINT64_C(0x00845789535E37A3), + UINT64_C(0x035D0DA356C25D05), UINT64_C(0x00C2670CA5FED688), + UINT64_C(0x010367DAE1D930AA), UINT64_C(0x0109B528D8B5E2DD), + UINT64_C(0x0160EAA2FD7C6C7E) } }, + { { UINT64_C(0x02EB058989126FAC), UINT64_C(0x03391866A50E5BF0), + UINT64_C(0x0249D99C7ECCC796), UINT64_C(0x031F124A928D03B2), + UINT64_C(0x0106FA952E20ED57), UINT64_C(0x001BC6E7D0224A59), + UINT64_C(0x00CE05E4690915C9), UINT64_C(0x020A90266CA1AD52), + UINT64_C(0x0094293617B76FE5) }, + { UINT64_C(0x034B04313831CD9D), UINT64_C(0x03B7732D91E90928), + UINT64_C(0x014A1E82A9C3D51E), UINT64_C(0x02AEC53126F32DDD), + UINT64_C(0x028AC8F7A359BD6C), UINT64_C(0x01B3A0EDE3DB4B4B), + UINT64_C(0x028EB875F2FBF434), UINT64_C(0x01AE764FB3A07035), + UINT64_C(0x006701271A1304D0) } }, + { { UINT64_C(0x0015B0C258BC45E5), UINT64_C(0x00500CF779654876), + UINT64_C(0x00D61185031EC91A), UINT64_C(0x0237D26B8AB4ABC0), + UINT64_C(0x0303DB5DD0B1113F), UINT64_C(0x02C21386988E1A69), + UINT64_C(0x002A78FA27F52A38), UINT64_C(0x02373FFEB8A111FB), + UINT64_C(0x01ED316A4A837D78) }, + { UINT64_C(0x02151FA30AE71753), UINT64_C(0x018559984522D236), + UINT64_C(0x02AA1CED8D6E9D2C), UINT64_C(0x0336B3277D457875), + UINT64_C(0x01FEB5FD684C784F), UINT64_C(0x0312F506AD5C57EB), + UINT64_C(0x026506BE8AA4F453), UINT64_C(0x0334630A573CB20E), + UINT64_C(0x00AA6EBCFBE68959) } }, + { { UINT64_C(0x0339D37CD0D9229F), UINT64_C(0x0170E57961291D98), + UINT64_C(0x029AE28566E91600), UINT64_C(0x02402C0C57E9B401), + UINT64_C(0x01EC520A49429756), UINT64_C(0x02A2CF079E7747FF), + UINT64_C(0x03751BAC838751C0), UINT64_C(0x021ED034A3B7C53C), + UINT64_C(0x0118500D09678BBC) }, + { UINT64_C(0x007E207E14E4C072), UINT64_C(0x039277F4D05B1F1F), + UINT64_C(0x02A052EAB5B31E63), UINT64_C(0x02B6A467E3451DEA), + UINT64_C(0x001613AC11B73C00), UINT64_C(0x00C5A6FA0FE24B0C), + UINT64_C(0x034F01404D69886A), UINT64_C(0x00324E28B3CA9FD4), + UINT64_C(0x005A3181E5A8A0B8) } }, + { { UINT64_C(0x02CE6BA9219403A6), UINT64_C(0x030DFB5CBE0CA405), + UINT64_C(0x039D700EFB6B4704), UINT64_C(0x0365CAD8F9D06BE7), + UINT64_C(0x00FE6873B0456CD8), UINT64_C(0x0090EC1026095A01), + UINT64_C(0x016F3A2CC5EC6B62), UINT64_C(0x001AD035AE2286FC), + UINT64_C(0x018819632B44D890) }, + { UINT64_C(0x039574FA6B48EFBA), UINT64_C(0x029D9BE545F8EFA2), + UINT64_C(0x00F42C7789B73AA2), UINT64_C(0x03CB90D731504D3E), + UINT64_C(0x0202ACD7E2DE6E8A), UINT64_C(0x02C8AD45BF6E2A24), + UINT64_C(0x0067A40E7FC99B4D), UINT64_C(0x03E0738CFADACE29), + UINT64_C(0x01177C98831102AA) } }, + { { UINT64_C(0x030A8610AC5E165D), UINT64_C(0x014AA32172C55EC2), + UINT64_C(0x027CE551CABE6211), UINT64_C(0x02477F69861DB6E6), + UINT64_C(0x01E8FF337E7E36EC), UINT64_C(0x0054ACDF3E1C9EF7), + UINT64_C(0x03DED626009E6F01), UINT64_C(0x02E49BFEF7555C32), + UINT64_C(0x002E4F1C3DB00152) }, + { UINT64_C(0x0332D8B606C8A9BC), UINT64_C(0x03AD929E6D810A1A), + UINT64_C(0x02C0030394592734), UINT64_C(0x02442FE9824BDA03), + UINT64_C(0x03CBAC9513FF99FB), UINT64_C(0x03B3D4E910EDA5AD), + UINT64_C(0x005A6F83029FFE7F), UINT64_C(0x02F6FF8D9E1F29A6), + UINT64_C(0x0188A1C08A99132D) } }, + { { UINT64_C(0x001F1A68F391B195), UINT64_C(0x00F016D21D573BA5), + UINT64_C(0x00EB4A4B11B13F56), UINT64_C(0x0390443801100BE8), + UINT64_C(0x00CDF1786689F09F), UINT64_C(0x008708E6F68D807B), + UINT64_C(0x00CFC70B63E2B318), UINT64_C(0x02DA65CABECA51A9), + UINT64_C(0x01BB4CC16417876B) }, + { UINT64_C(0x002270E155C4416F), UINT64_C(0x0275E82A3EE6287C), + UINT64_C(0x019550DEBAE641A6), UINT64_C(0x0189E9D792313D48), + UINT64_C(0x022E11801B0D93FC), UINT64_C(0x006308C9DD555E4E), + UINT64_C(0x02F9EBC6E275E976), UINT64_C(0x00011D5E55FC63C6), + UINT64_C(0x01D3E16AA048085F) } }, + { { UINT64_C(0x01C6845EE45C5FF5), UINT64_C(0x03B6D8ADC4E97112), + UINT64_C(0x0068C305E2731ED0), UINT64_C(0x037AFCABEDF2C8B5), + UINT64_C(0x016C0203DF9F154E), UINT64_C(0x03FF6DCCA97B1A6C), + UINT64_C(0x019D691BB5C8CD06), UINT64_C(0x022C5EA48F6FE25F), + UINT64_C(0x00553B7F4065FABA) }, + { UINT64_C(0x006009B918BF712A), UINT64_C(0x0087FAC6655FF7A7), + UINT64_C(0x039DB19E2FDB3477), UINT64_C(0x014389D0D15C2072), + UINT64_C(0x02B3AB48E4A3E0DF), UINT64_C(0x00D55CD68B325E8D), + UINT64_C(0x020332F2B62898A4), UINT64_C(0x019DB12158F6D4D6), + UINT64_C(0x010E1F4D65633E42) } }, + { { UINT64_C(0x035FDBF97A66FBB8), UINT64_C(0x0397FDA15F48E249), + UINT64_C(0x0314912B73A0AD12), UINT64_C(0x018B5A1F5856CC06), + UINT64_C(0x026DB1F90C057E46), UINT64_C(0x02BC203FE8141974), + UINT64_C(0x032698D0DBE8152C), UINT64_C(0x01BC802ED9745CEA), + UINT64_C(0x00B1E80CFCF35D14) }, + { UINT64_C(0x026A4890175570A1), UINT64_C(0x03DEFA508892558E), + UINT64_C(0x00D274862CB6E1EF), UINT64_C(0x02F12D3DF3D2916D), + UINT64_C(0x01D9AF2100AA8841), UINT64_C(0x024123BB5E94517B), + UINT64_C(0x00CEA1686B604BBF), UINT64_C(0x007E9A1A2F8E072B), + UINT64_C(0x012919949C3170DE) } }, + { { UINT64_C(0x028CFBD7509B3F23), UINT64_C(0x0341392CF0D37CE2), + UINT64_C(0x03BB3B849E04FCBA), UINT64_C(0x004BCCA7E7C71C3F), + UINT64_C(0x007EAF927839C8E2), UINT64_C(0x0061602F3DAFE11E), + UINT64_C(0x01D0F1831E9A3AE7), UINT64_C(0x032630A59BC245BA), + UINT64_C(0x00C9122EE0775F54) }, + { UINT64_C(0x027706840C226E2C), UINT64_C(0x021FC974C3A78386), + UINT64_C(0x0254E3803EE94792), UINT64_C(0x02763098FB07712F), + UINT64_C(0x03085BE39396F8D2), UINT64_C(0x039CDBB83C0DCAE5), + UINT64_C(0x0275170CD909C685), UINT64_C(0x02A48EFA2F7CBD9D), + UINT64_C(0x0151800A47F18A8F) } }, + { { UINT64_C(0x0266B421EDA35EBF), UINT64_C(0x016EE661AEE22D67), + UINT64_C(0x02189CC63A33934C), UINT64_C(0x02035BBEEF2E6505), + UINT64_C(0x03A21BDAB12827FF), UINT64_C(0x010837E5E86E37F7), + UINT64_C(0x000889F4FF18C641), UINT64_C(0x00B83D668CF5F701), + UINT64_C(0x00A90A0E4C84A45C) }, + { UINT64_C(0x014A9DB7546020F0), UINT64_C(0x026B8123F183E007), + UINT64_C(0x014172F8A29A74BC), UINT64_C(0x03ECB113DDF05CC6), + UINT64_C(0x0056019B554AE591), UINT64_C(0x01C3E5A8AC670B45), + UINT64_C(0x0328112932236FCD), UINT64_C(0x0147D09F4CAD8D13), + UINT64_C(0x007CA80EB751C2E8) } }, + { { UINT64_C(0x03260C3CA6A09384), UINT64_C(0x01A2DAEF9F24A534), + UINT64_C(0x01FA415780AE38B6), UINT64_C(0x02FE728B02BEADE2), + UINT64_C(0x031F71486AA63A4A), UINT64_C(0x021F907074346F6D), + UINT64_C(0x00225A4DA564511F), UINT64_C(0x02CC4C97BC497C99), + UINT64_C(0x01C2DD5CCD878296) }, + { UINT64_C(0x03CD4A619B2264B8), UINT64_C(0x03093FC7F1583EA2), + UINT64_C(0x02B47AD7D9A2FB6F), UINT64_C(0x00C0D0B440BCA2A9), + UINT64_C(0x00B22B3DB051C447), UINT64_C(0x01CEC4D502303875), + UINT64_C(0x0340F66A4D33C79A), UINT64_C(0x00C02F44477E4379), + UINT64_C(0x01A54038DE4CD448) } }, + { { UINT64_C(0x036F26FDD184B415), UINT64_C(0x0077144A843CA00F), + UINT64_C(0x012DE3D50936A2A0), UINT64_C(0x00F1A915BEF669FD), + UINT64_C(0x02A728B908D36285), UINT64_C(0x023009A8F3585930), + UINT64_C(0x01AFE37F5F6903E6), UINT64_C(0x015BE42AC69043A0), + UINT64_C(0x0029A3961324FE67) }, + { UINT64_C(0x03744629EA87B468), UINT64_C(0x01B1B421D820F115), + UINT64_C(0x009DEF11D39EF564), UINT64_C(0x002A1D3B4419573F), + UINT64_C(0x00558617DEFBD955), UINT64_C(0x03E4BE19D9F46F14), + UINT64_C(0x012A38F1BF3ED4C3), UINT64_C(0x00B5C5CD4AC51A53), + UINT64_C(0x00A0E10EBF360168) } }, + { { UINT64_C(0x011616DEF784F95B), UINT64_C(0x02677312C6AD8D2D), + UINT64_C(0x03F3EF6B22617C90), UINT64_C(0x029E26932332F57D), + UINT64_C(0x0285AE820DE6D58A), UINT64_C(0x014C9337216D597B), + UINT64_C(0x00A6F170854E55AF), UINT64_C(0x010EA56E5DFB91ED), + UINT64_C(0x012F8DBABA868C11) }, + { UINT64_C(0x015249FC91DCCF70), UINT64_C(0x0306C5CB46C7DD02), + UINT64_C(0x021954201045F6CB), UINT64_C(0x00E2B058688BC602), + UINT64_C(0x002D5DDCF79B78E3), UINT64_C(0x03AF429058EAD023), + UINT64_C(0x016A3FA5F7DB5234), UINT64_C(0x01EAFE34B82E4D26), + UINT64_C(0x0095115BD2F5AE74) } }, + { { UINT64_C(0x01C1741308F9B528), UINT64_C(0x011456D2FA27C256), + UINT64_C(0x029EE8BA38AC33BC), UINT64_C(0x0162AD2DF7E46CB7), + UINT64_C(0x01239C1DD2198564), UINT64_C(0x00D634D586B52D14), + UINT64_C(0x00362033A3D5AE2B), UINT64_C(0x00F403720300250C), + UINT64_C(0x0134664850978D32) }, + { UINT64_C(0x032ECC2C4837554E), UINT64_C(0x008F4BC077701F7F), + UINT64_C(0x002D0F7435107071), UINT64_C(0x015A21A6D90E61B2), + UINT64_C(0x03E1B78AD2E928DC), UINT64_C(0x02A2214D7306E1AF), + UINT64_C(0x01C4FCA92A1694C1), UINT64_C(0x00656FBD23561E1B), + UINT64_C(0x013FF3454072CB98) } }, + }, + { + { { UINT64_C(0x003C182D851368EE), UINT64_C(0x0128CF55F2467CB0), + UINT64_C(0x00767E333ACE3BB9), UINT64_C(0x011F65D379FE73C3), + UINT64_C(0x038B18FA5C037C7D), UINT64_C(0x01B3CD7DFA5B80B3), + UINT64_C(0x0086C596F1A3E912), UINT64_C(0x00A8AD1EBFF700CD), + UINT64_C(0x00E12C370BFEEC8C) }, + { UINT64_C(0x00E5DE2C18A3F84B), UINT64_C(0x02D9CB8AB50B28B7), + UINT64_C(0x01D7EDD0731B2C4B), UINT64_C(0x0328A026B1FAD960), + UINT64_C(0x02189B0FF8B6CA46), UINT64_C(0x03FD18C777A3B6E8), + UINT64_C(0x0004BCBA72EE3E81), UINT64_C(0x0214C7D12A3F1BC4), + UINT64_C(0x01CA103DD1B9C887) } }, + { { UINT64_C(0x00A781D5DE024391), UINT64_C(0x01D4AC6B9AA04C66), + UINT64_C(0x0298088919924A4E), UINT64_C(0x02295F237B9E2B5F), + UINT64_C(0x0228FA8EA8570017), UINT64_C(0x01AE7F1814C6B59C), + UINT64_C(0x008FF64625C08899), UINT64_C(0x002A626C4EECF6A1), + UINT64_C(0x0118A9AD8CEFC12E) }, + { UINT64_C(0x014B05DA9E9AB68C), UINT64_C(0x036EDCE530984903), + UINT64_C(0x03147DF5F527C318), UINT64_C(0x0196BC1DED347CDD), + UINT64_C(0x01BB4AC96E14A591), UINT64_C(0x03C4F3EDF23B9460), + UINT64_C(0x03547D14C90381B8), UINT64_C(0x03693FA10D27208C), + UINT64_C(0x003B75AA5EA458F7) } }, + { { UINT64_C(0x02779CC419496A3E), UINT64_C(0x01D3BB2E4FE62409), + UINT64_C(0x032F4C70FCAE21C4), UINT64_C(0x013310DA0ECE14A3), + UINT64_C(0x03F3B3593FC9DDBB), UINT64_C(0x0051822EF8CFB99D), + UINT64_C(0x012D89EA3AE1C997), UINT64_C(0x00D12E2856922EAE), + UINT64_C(0x00E81549D787C4C8) }, + { UINT64_C(0x02337896D4B88B67), UINT64_C(0x00A59FC2D1584FBE), + UINT64_C(0x02FAA1ED2840EB09), UINT64_C(0x02061203F2AA6499), + UINT64_C(0x03BF834C1997385E), UINT64_C(0x02274588F3F24162), + UINT64_C(0x001CC1FD4A622D5A), UINT64_C(0x0044FEAA4FA76E84), + UINT64_C(0x00B3619A1E813DA3) } }, + { { UINT64_C(0x0276BEE0D076683D), UINT64_C(0x030210C875AFAF69), + UINT64_C(0x0011EDC7657E64F0), UINT64_C(0x02488D3166D94F20), + UINT64_C(0x011EA313A85E0E01), UINT64_C(0x032E12BF7FFAF1B4), + UINT64_C(0x00327C5A8CCEF85B), UINT64_C(0x0252EF23E4C30C4E), + UINT64_C(0x01CC6A9EB749B839) }, + { UINT64_C(0x02B00795BB99594F), UINT64_C(0x01F383BC6F8BE7AA), + UINT64_C(0x00760524F18BF5F2), UINT64_C(0x013AA36073E7DDA9), + UINT64_C(0x025A0A5A67DE0097), UINT64_C(0x01A61B644AB9486A), + UINT64_C(0x0313B98AABF5EA94), UINT64_C(0x003BB89B65E51F0D), + UINT64_C(0x01776B040E0F32AB) } }, + { { UINT64_C(0x01721BA5B2662A6A), UINT64_C(0x0215447AF117F66C), + UINT64_C(0x03DB83ECC5D3D99A), UINT64_C(0x0215A6C6CE2794E3), + UINT64_C(0x010BE3489ECF31F8), UINT64_C(0x012B3FA3634CDEF2), + UINT64_C(0x017C1F03CDFBCD8A), UINT64_C(0x02EE6A91A626677E), + UINT64_C(0x003FF1568F6BE74E) }, + { UINT64_C(0x01995519CD76A58E), UINT64_C(0x02DC3A3040585EF5), + UINT64_C(0x0061DDCAE3A68494), UINT64_C(0x025E1A1EF3C2AAA5), + UINT64_C(0x00CA54B0D55B6CE8), UINT64_C(0x00543A97F9E4CC22), + UINT64_C(0x01F7F09EDEFF8BFA), UINT64_C(0x00168473D37DD44E), + UINT64_C(0x00FE410E086ACD40) } }, + { { UINT64_C(0x006AF7630DA09D54), UINT64_C(0x010ABA844C57F2B5), + UINT64_C(0x03C9AC1832567F47), UINT64_C(0x00B3CFD3C603E8BB), + UINT64_C(0x01A04969EEACA1C9), UINT64_C(0x02E57B7E17E4591D), + UINT64_C(0x03E68AB3619DA17B), UINT64_C(0x00ECCA930F030279), + UINT64_C(0x01B2C98B4036BF1D) }, + { UINT64_C(0x0077C78B045007F6), UINT64_C(0x03CCE2791A0C0815), + UINT64_C(0x01688DB89F24D07A), UINT64_C(0x0017DBDDD43EAD41), + UINT64_C(0x033A80BF740D6693), UINT64_C(0x02F768ED65974242), + UINT64_C(0x026B74A3E2B11EFF), UINT64_C(0x023E110BE2C45B38), + UINT64_C(0x00B98CD56F7AB2CD) } }, + { { UINT64_C(0x0383E5A50FB0D3ED), UINT64_C(0x034513587B8AB555), + UINT64_C(0x03B1C6783B97BD45), UINT64_C(0x0062B781B344D4E1), + UINT64_C(0x00FD5DFB5083FED9), UINT64_C(0x00CF4B880197BC29), + UINT64_C(0x02084C42BE014183), UINT64_C(0x01C81317B056C149), + UINT64_C(0x016318E131F69642) }, + { UINT64_C(0x019B4B41240FA002), UINT64_C(0x0312BAA4E914151E), + UINT64_C(0x0180907D9FACF5B0), UINT64_C(0x007774B33895C1D0), + UINT64_C(0x017E17EBCCA7FA72), UINT64_C(0x030812EEB0BC890A), + UINT64_C(0x02294B1CB2912B73), UINT64_C(0x03835B7F1FA5A17D), + UINT64_C(0x001712AC45AB3EC9) } }, + { { UINT64_C(0x006603D4F696BA83), UINT64_C(0x00D22CAFE710B52F), + UINT64_C(0x00A86019255DD155), UINT64_C(0x03D9E86EE758D999), + UINT64_C(0x024051D5CE463A6D), UINT64_C(0x02906D0203D86E6E), + UINT64_C(0x02B53E1EA3B77733), UINT64_C(0x01298EBA501720C6), + UINT64_C(0x00A49AB3D5669F64) }, + { UINT64_C(0x00C3477F5E8C01EF), UINT64_C(0x02CFF8B3EED1F46C), + UINT64_C(0x02588DBF2A1259EE), UINT64_C(0x01BC0AE8F9969F27), + UINT64_C(0x0284232123DA5F9F), UINT64_C(0x03E79C894325C436), + UINT64_C(0x00FE809311DA7F3B), UINT64_C(0x0102255D12EBA535), + UINT64_C(0x01F50E25AE34114E) } }, + { { UINT64_C(0x0277D803646C1FB6), UINT64_C(0x02488A5E5052BBB1), + UINT64_C(0x000391356EAC8F11), UINT64_C(0x01646437C00A834F), + UINT64_C(0x02EAB8F940B93B40), UINT64_C(0x024958DF1C74ED20), + UINT64_C(0x03F2F1AF37BD1D73), UINT64_C(0x011FE3F5381F17F4), + UINT64_C(0x00EF826DAE390184) }, + { UINT64_C(0x00D2D6B4BA78B572), UINT64_C(0x0073D6C96322203E), + UINT64_C(0x018C7B2E976AA1E5), UINT64_C(0x026E3F6920E5F016), + UINT64_C(0x01E846537687AFF5), UINT64_C(0x017563948203FD81), + UINT64_C(0x019F1D17DABC8810), UINT64_C(0x00F8ED530C4E3A67), + UINT64_C(0x0196F10721B62324) } }, + { { UINT64_C(0x032F87D12878503F), UINT64_C(0x03648B98DC48ECC8), + UINT64_C(0x0184FD4C8EF53242), UINT64_C(0x01333846A9EEDB04), + UINT64_C(0x02C1DF317872BBBF), UINT64_C(0x002D6E1FAF12E7FB), + UINT64_C(0x039480C808CCDA38), UINT64_C(0x02845D8F6413B928), + UINT64_C(0x01979462C493957E) }, + { UINT64_C(0x02E38CCA2947A480), UINT64_C(0x00298B225770DDF9), + UINT64_C(0x02859B366A105BC5), UINT64_C(0x00C80C32E8803179), + UINT64_C(0x01DEC1627A49675D), UINT64_C(0x018FD7B10ED2384C), + UINT64_C(0x00CE729C9A700811), UINT64_C(0x00B9251157C6408C), + UINT64_C(0x00D18FB5EDB29090) } }, + { { UINT64_C(0x0019C27F1002FA40), UINT64_C(0x0187B6686A1976EA), + UINT64_C(0x03089E6ABFDCA1BA), UINT64_C(0x01E3A9276DAB6A31), + UINT64_C(0x01010381B56E1374), UINT64_C(0x02059C3444CA22AD), + UINT64_C(0x0340D48C52418852), UINT64_C(0x001C397FEACAD014), + UINT64_C(0x00A9B91476DE1E3B) }, + { UINT64_C(0x01B18811D2203C97), UINT64_C(0x006802C3244A5143), + UINT64_C(0x034CC7484B00B0C2), UINT64_C(0x02D138E88D39FE0E), + UINT64_C(0x00035A355C8D48A2), UINT64_C(0x01257073943DE7F1), + UINT64_C(0x003B2AA49BD592AC), UINT64_C(0x03D7C1DBA4418663), + UINT64_C(0x01A24E3A67DAF410) } }, + { { UINT64_C(0x02B819FA06A8409F), UINT64_C(0x004A52ACCE9D798F), + UINT64_C(0x0342BCE5E942F51F), UINT64_C(0x01499CF92BE85899), + UINT64_C(0x03ACD69B9655760D), UINT64_C(0x020F4E9A7813F0D0), + UINT64_C(0x03880853D5E05E02), UINT64_C(0x02B0666045F612A7), + UINT64_C(0x00302D53FFFEEF1D) }, + { UINT64_C(0x025294489593BC03), UINT64_C(0x013D42D26192AAEB), + UINT64_C(0x010D09630D5F95E5), UINT64_C(0x02152684A6D53F7C), + UINT64_C(0x022DD5DAD7C7B4A8), UINT64_C(0x02966500C48498D3), + UINT64_C(0x03D763E4EB3C2E33), UINT64_C(0x027FAC6AFEDC5F61), + UINT64_C(0x0074EA2C83E52FE7) } }, + { { UINT64_C(0x01DB9F78868172DA), UINT64_C(0x0100A5C0A0C25D2E), + UINT64_C(0x023587D7C3E66CE7), UINT64_C(0x0234D19B042FCCD7), + UINT64_C(0x0059721B0F60680E), UINT64_C(0x03A0B2DF23AB3A42), + UINT64_C(0x0177AFB700329CAC), UINT64_C(0x03D5A5CFAF392AE7), + UINT64_C(0x00CF59BC96ECDBA2) }, + { UINT64_C(0x03CE38933BF1C993), UINT64_C(0x0388C35CC45F89F5), + UINT64_C(0x039286D1ED3DB46C), UINT64_C(0x0061947308D0F830), + UINT64_C(0x0307100E3F7C9C8E), UINT64_C(0x00967048E8CC7CC9), + UINT64_C(0x03CAD0590370F457), UINT64_C(0x0110D9420ECE3996), + UINT64_C(0x009955E94586B830) } }, + { { UINT64_C(0x03B6822745F0E5DA), UINT64_C(0x03120B5D07E9C6A5), + UINT64_C(0x01F88B173B2A0839), UINT64_C(0x0245CA639869EE96), + UINT64_C(0x0199F585B26F8120), UINT64_C(0x01D2153C5D41B782), + UINT64_C(0x009EAD730F2E3B2D), UINT64_C(0x007E27FEF3F3388E), + UINT64_C(0x01DD0BBF32960B2B) }, + { UINT64_C(0x0298F45E5931C0F0), UINT64_C(0x012A6F48D3898EAD), + UINT64_C(0x01EFD537B310CFED), UINT64_C(0x030390CD48666C4B), + UINT64_C(0x01DCF41DD16073BB), UINT64_C(0x035CF923EABD525A), + UINT64_C(0x00DDF48F41B47311), UINT64_C(0x0316E0000BFFF7E2), + UINT64_C(0x003C6A0632821286) } }, + { { UINT64_C(0x006FA434852228CC), UINT64_C(0x03EE279533E093C6), + UINT64_C(0x03C215EE36B974E7), UINT64_C(0x02FA330552481892), + UINT64_C(0x01ABFC67F3C2F700), UINT64_C(0x000945F47832719D), + UINT64_C(0x01BA378921E29D68), UINT64_C(0x0364936B83B66609), + UINT64_C(0x0137B7B2011DE260) }, + { UINT64_C(0x00A7EBAC8BA1E090), UINT64_C(0x0343E15BB9BADFCE), + UINT64_C(0x01C5AFA1059527D8), UINT64_C(0x039CE94C694D78AB), + UINT64_C(0x020EE7FF8C758AFB), UINT64_C(0x03859CF409F61041), + UINT64_C(0x033F2682BABD9F38), UINT64_C(0x0344ED7AA22D40CE), + UINT64_C(0x00C59BE4543774E1) } }, + { { UINT64_C(0x01B5777A8F1CAC2C), UINT64_C(0x001A1BB0AB5E6822), + UINT64_C(0x011BC043646DAF27), UINT64_C(0x03F711C68F6A2900), + UINT64_C(0x001C279115DF5830), UINT64_C(0x017D6649CFD4D909), + UINT64_C(0x02270B8E48C4FC60), UINT64_C(0x01D402B5FB5683E0), + UINT64_C(0x001F8DB87807BBF7) }, + { UINT64_C(0x00C9DAC0A9244F78), UINT64_C(0x02B03A3698AE7AB0), + UINT64_C(0x02CCF3FF50BC045B), UINT64_C(0x03BCD2148E821FFF), + UINT64_C(0x035E87616BD7E71C), UINT64_C(0x034B54F4034B6093), + UINT64_C(0x02C5BEA4BCD01770), UINT64_C(0x0219F4B5BD513DB4), + UINT64_C(0x01DF5AC58C13B575) } }, + }, + { + { { UINT64_C(0x019885D110E10587), UINT64_C(0x0225E6982614E90C), + UINT64_C(0x03FE389B08EF52DA), UINT64_C(0x02986A5F6773FA41), + UINT64_C(0x02D7E3FB92A3A338), UINT64_C(0x02804DB8E96B46A6), + UINT64_C(0x02ED29A77A3BFC07), UINT64_C(0x021EDA658D1622A9), + UINT64_C(0x00DC41F148BEEF47) }, + { UINT64_C(0x00671195EBF698BD), UINT64_C(0x02DA5978A5D3B8AE), + UINT64_C(0x0067084C20702323), UINT64_C(0x01BAE92F07B45047), + UINT64_C(0x01EECFF9A6840B39), UINT64_C(0x00B5A0A6F615E949), + UINT64_C(0x02CE02C0AFAD4F4D), UINT64_C(0x02CCCE13BD8C56FD), + UINT64_C(0x001BC38FE857CCC6) } }, + { { UINT64_C(0x00081356B6965640), UINT64_C(0x006CE26431E83C07), + UINT64_C(0x01BA4874007EE7A0), UINT64_C(0x02537377BE8BDCBF), + UINT64_C(0x0248DB2FA66BD85D), UINT64_C(0x028C676B603EF79F), + UINT64_C(0x011FB7160B2BE1C4), UINT64_C(0x02E60E65885FEFB9), + UINT64_C(0x012B85F1B13BE0ED) }, + { UINT64_C(0x0353AA14ECFB1D0D), UINT64_C(0x01FF0DDD82885F37), + UINT64_C(0x0331E99B56FBDDD7), UINT64_C(0x03AEB28F8419966F), + UINT64_C(0x021F907EA8D0F042), UINT64_C(0x013BD7D21430856E), + UINT64_C(0x0386870C6BB892CA), UINT64_C(0x03E04B0EFADCEFFA), + UINT64_C(0x007C04B740BD4123) } }, + { { UINT64_C(0x0003B2CD3E0BF039), UINT64_C(0x00C735DA6B8581E9), + UINT64_C(0x0012D9341E1131F3), UINT64_C(0x03D2B2BBE7116022), + UINT64_C(0x00A056CCF73BDC37), UINT64_C(0x027C9AA3BBBDE400), + UINT64_C(0x02165FF6E36E8907), UINT64_C(0x0139C88969C85A96), + UINT64_C(0x00C7B0F49EEA4A8D) }, + { UINT64_C(0x01F03CD678EAF6EB), UINT64_C(0x01BF3F1E8FBD78DF), + UINT64_C(0x00857FD3BFA434E9), UINT64_C(0x008641B0E586D15E), + UINT64_C(0x021227FC18AF0795), UINT64_C(0x022F892EEA381B7A), + UINT64_C(0x00B3FA1F0F06E680), UINT64_C(0x01EAB02BC55C4EE1), + UINT64_C(0x01116BB9BA45D30F) } }, + { { UINT64_C(0x03B557A9EDCBF5E2), UINT64_C(0x00B1DFD3ECC7A54C), + UINT64_C(0x02DCE258E5A7E8D4), UINT64_C(0x00CA7703C434FC01), + UINT64_C(0x038801282507AB56), UINT64_C(0x025FD9FA5A9E7C74), + UINT64_C(0x0084D0CBBC9F71D9), UINT64_C(0x00D621CCEBB93EC1), + UINT64_C(0x007E0D7D26AF06B2) }, + { UINT64_C(0x02584763447D2B4B), UINT64_C(0x00E02402AF814CEB), + UINT64_C(0x01A0946A66DEBE3C), UINT64_C(0x025BDCD462246772), + UINT64_C(0x032E9062B0C5E215), UINT64_C(0x037BCF49D9FBECDC), + UINT64_C(0x001F56138C539278), UINT64_C(0x000AEA3CABF951BB), + UINT64_C(0x007AA80F0C621590) } }, + { { UINT64_C(0x00B8EEBBBD959BD9), UINT64_C(0x001BE3997D083340), + UINT64_C(0x01B3F063154C5C54), UINT64_C(0x0258C476F7A9A983), + UINT64_C(0x0042A485E75D36E5), UINT64_C(0x034928BB28AF526A), + UINT64_C(0x01BA009661FE033D), UINT64_C(0x039E10035E2FEDA5), + UINT64_C(0x01AFFCC1198129AF) }, + { UINT64_C(0x030AD5348384E611), UINT64_C(0x01579499B7C9277C), + UINT64_C(0x01969EE33931346F), UINT64_C(0x025C5C1EBDB572DA), + UINT64_C(0x033A65D217266A39), UINT64_C(0x026F0D4AD6360EAB), + UINT64_C(0x037599346289BDA2), UINT64_C(0x0092404E9E02CE9C), + UINT64_C(0x01D0C694EC0434A7) } }, + { { UINT64_C(0x0099723AA10FBD04), UINT64_C(0x03F7E7474E4B9E21), + UINT64_C(0x03ECBDF12C367638), UINT64_C(0x009B6D83C1B5EFBE), + UINT64_C(0x03E6CE2FC3522A5D), UINT64_C(0x0083A6DEF388FDCF), + UINT64_C(0x0001D8542F4EA36B), UINT64_C(0x035D032BD68C8381), + UINT64_C(0x0131DF4BF7A79938) }, + { UINT64_C(0x008A14C7B9493BE8), UINT64_C(0x0273BD54452391FF), + UINT64_C(0x035758B804AAD2E8), UINT64_C(0x0218D8B66AABA8CD), + UINT64_C(0x0013BC5120CE58B7), UINT64_C(0x027C6BF5C3CF36BB), + UINT64_C(0x0325B4A1E773C0D4), UINT64_C(0x01C2F7A449EA2D3B), + UINT64_C(0x01C6E6D30CAF29F6) } }, + { { UINT64_C(0x0321B0EB2DAA2FB7), UINT64_C(0x001AF441996ABD26), + UINT64_C(0x0075B82E9704E625), UINT64_C(0x00FD42C4DDFBEF6D), + UINT64_C(0x0199707C61408809), UINT64_C(0x017F62CF54E5FBA8), + UINT64_C(0x03E8914D3356B6E7), UINT64_C(0x010B415870E01C17), + UINT64_C(0x01B8D0304825F773) }, + { UINT64_C(0x01AA92433FDAA949), UINT64_C(0x01186BD47A9D105F), + UINT64_C(0x03D995A63573F12F), UINT64_C(0x032129C097A55B0D), + UINT64_C(0x01817B31A05D6C77), UINT64_C(0x03D1CAF9B4BCAF81), + UINT64_C(0x01524CCC3B01B281), UINT64_C(0x0296DAA6FDAA7E18), + UINT64_C(0x002F1DC74BE29F0C) } }, + { { UINT64_C(0x02171F9BDC8D6167), UINT64_C(0x03D306F736B287BD), + UINT64_C(0x021943224F5B91BE), UINT64_C(0x02B6BA63BB681A7A), + UINT64_C(0x003527F99B16E603), UINT64_C(0x00CC933DC7095468), + UINT64_C(0x0265D81677BFCEEF), UINT64_C(0x028AA225CE78ABEA), + UINT64_C(0x00837C63F321EE01) }, + { UINT64_C(0x00A4B775684BF04E), UINT64_C(0x00AB33042AB3CA3F), + UINT64_C(0x019796F5B70DA12B), UINT64_C(0x00CD06B6726983AD), + UINT64_C(0x002698B98D097375), UINT64_C(0x03BB3A2632FF6007), + UINT64_C(0x00B02BB6915F2608), UINT64_C(0x0267E64CB1F79BA2), + UINT64_C(0x01DAB183858DB0F4) } }, + { { UINT64_C(0x01D545A21757C756), UINT64_C(0x001D934F1E31FF52), + UINT64_C(0x023B0285CE4B1861), UINT64_C(0x031354B83A06220D), + UINT64_C(0x017177FFE06AFE14), UINT64_C(0x019E6D07584A960E), + UINT64_C(0x0119B9405A4BEA49), UINT64_C(0x019D70486EC70531), + UINT64_C(0x00D7844A95DDF521) }, + { UINT64_C(0x02045C5C7288CF7B), UINT64_C(0x00677CB68405B1B1), + UINT64_C(0x01845055E3EA0793), UINT64_C(0x035EFB9C55059FBD), + UINT64_C(0x038843F3AF91E7EA), UINT64_C(0x00822747CA170235), + UINT64_C(0x037B132A90F3A94C), UINT64_C(0x00526CF439B472A8), + UINT64_C(0x00132F18D93B62FB) } }, + { { UINT64_C(0x01D84FC9D0CF69E7), UINT64_C(0x006503AA38D2A5EE), + UINT64_C(0x03A94DFC118DD98F), UINT64_C(0x03B7F19AE7F392FF), + UINT64_C(0x007287A7DC1849A3), UINT64_C(0x00067A7B188F6CE5), + UINT64_C(0x02A347BDE0D7D087), UINT64_C(0x0268E88CC6AAFE02), + UINT64_C(0x010F44A365B11B99) }, + { UINT64_C(0x018F73AC92AE7427), UINT64_C(0x0371CC00B812BB06), + UINT64_C(0x0093D3088101FF62), UINT64_C(0x00C8613B7544141B), + UINT64_C(0x01AF7C6201945AC7), UINT64_C(0x030C7CA555FE097F), + UINT64_C(0x025B2E6EDA00AB31), UINT64_C(0x0214A3B6A76443D0), + UINT64_C(0x0040A360259C7CDD) } }, + { { UINT64_C(0x006047E27F3DE4D2), UINT64_C(0x01FC4A47DA6A0A53), + UINT64_C(0x015A543BD0BC352A), UINT64_C(0x014AACDA98A2B65E), + UINT64_C(0x036FE6BD165C71A3), UINT64_C(0x02DF772BAC823A1F), + UINT64_C(0x00416598B2CD1443), UINT64_C(0x032CA3B1D0CAEDD0), + UINT64_C(0x0032FB284CCCEF17) }, + { UINT64_C(0x006DC83E96A2607F), UINT64_C(0x013B7280B80B6341), + UINT64_C(0x004551B88CA47813), UINT64_C(0x01849A56EE6AB37F), + UINT64_C(0x00C3074BC3D0074A), UINT64_C(0x0049915404661EF6), + UINT64_C(0x017F0B8543807006), UINT64_C(0x01235802E0AA61E9), + UINT64_C(0x016866C456C5454B) } }, + { { UINT64_C(0x0397A466381DC2A6), UINT64_C(0x00CD4D54FE413A43), + UINT64_C(0x0320035D8FD47311), UINT64_C(0x03FEF7B90109A77E), + UINT64_C(0x01FF2C161A6CFCBA), UINT64_C(0x014089BF152955D6), + UINT64_C(0x00595A7ADB79909F), UINT64_C(0x02E10BC4FB022F89), + UINT64_C(0x012739D14BF39AB2) }, + { UINT64_C(0x03045804E123BA29), UINT64_C(0x037196AFA31BDBE1), + UINT64_C(0x01A3BADADE7D8795), UINT64_C(0x005FE72D3736F1F7), + UINT64_C(0x00B261A79C9F5DAE), UINT64_C(0x00CC055F3C4A27EA), + UINT64_C(0x018DD7C9E5958FC2), UINT64_C(0x0096748344CCC75E), + UINT64_C(0x0065ADD88400A218) } }, + { { UINT64_C(0x033557744356B52C), UINT64_C(0x03DD368D0EA0209F), + UINT64_C(0x02EA630FD3CCDE4D), UINT64_C(0x037A07B902382B40), + UINT64_C(0x000B7AF2CF41C092), UINT64_C(0x0221D85556DCC533), + UINT64_C(0x03C92114F14EA6E1), UINT64_C(0x006813B827858B16), + UINT64_C(0x011933B0203B754D) }, + { UINT64_C(0x03A2396D5A659158), UINT64_C(0x0350A8E07708486E), + UINT64_C(0x0306EEBAE2B49C8B), UINT64_C(0x00EC9E65F76A5B29), + UINT64_C(0x03CECDD7F9A47F6A), UINT64_C(0x024DB8B97AA04533), + UINT64_C(0x028D089D2C8EBEAE), UINT64_C(0x01959F5D1CB2E7ED), + UINT64_C(0x0024A23BD4403D34) } }, + { { UINT64_C(0x038B31C4EED9CDF5), UINT64_C(0x0185AFF2C98A930A), + UINT64_C(0x0245E4B7D7DD3E7E), UINT64_C(0x00232AA32609076B), + UINT64_C(0x023F2A9E6F982A24), UINT64_C(0x03087A8E3FF2F39E), + UINT64_C(0x02F6CA050121ACCC), UINT64_C(0x03568930B3D90B8C), + UINT64_C(0x01C922F3A5335B36) }, + { UINT64_C(0x032AD6EEE92B1FE6), UINT64_C(0x02FC436D7BD6B2C7), + UINT64_C(0x023EDD35035286A3), UINT64_C(0x003D77B6144EB9BC), + UINT64_C(0x0304C9A105C2BAEE), UINT64_C(0x01ADB987C7CA786C), + UINT64_C(0x0132676ADD1D742E), UINT64_C(0x02A9E9CB749E88B9), + UINT64_C(0x00A99A53E3A5AC0A) } }, + { { UINT64_C(0x03639306E80DE633), UINT64_C(0x01AB767B97949EED), + UINT64_C(0x006F4BAA789B6820), UINT64_C(0x039D5F497550BD7A), + UINT64_C(0x00B4B2B380BC772D), UINT64_C(0x03022AD28F3A1DD0), + UINT64_C(0x0017950F61ACF7EB), UINT64_C(0x019CAC6E06DC1B93), + UINT64_C(0x008470E16670F97A) }, + { UINT64_C(0x03C11D39EE5D0D74), UINT64_C(0x01C090F08CC26FEC), + UINT64_C(0x0006AD970C46C574), UINT64_C(0x015907C555DF013E), + UINT64_C(0x0070AB35D20A91F0), UINT64_C(0x00C0481F822220A4), + UINT64_C(0x03A92E8B413E83FE), UINT64_C(0x00C3982C5F8D922E), + UINT64_C(0x017CB1B97D4ED7B4) } }, + { { UINT64_C(0x0057D40664DA7708), UINT64_C(0x00D1DC31FC3ED514), + UINT64_C(0x01C1C72DE7D6ECFF), UINT64_C(0x00DAEABFA1F9C5DE), + UINT64_C(0x0027EE8200E32455), UINT64_C(0x00F2A2064D51F4F3), + UINT64_C(0x0087C336FD335B37), UINT64_C(0x0350C7F9A0D4FC4D), + UINT64_C(0x01D53465439099CD) }, + { UINT64_C(0x01B27DD4E9031706), UINT64_C(0x0197F1275CBBB42C), + UINT64_C(0x015ABB1962BC7CE5), UINT64_C(0x015AEBA4FCC2D21C), + UINT64_C(0x01DB34AC91849D8B), UINT64_C(0x02168D50E8D52313), + UINT64_C(0x024C7BCFFA60FB49), UINT64_C(0x00653790EC4A5122), + UINT64_C(0x0021ECA115250E74) } }, + }, + { + { { UINT64_C(0x01017ED5F1C86157), UINT64_C(0x01C5FACEAAF3291A), + UINT64_C(0x01980E57AC2978AD), UINT64_C(0x012E4C78C1EF8537), + UINT64_C(0x019080B37DC2F0DA), UINT64_C(0x0104D379379FF55E), + UINT64_C(0x0019CF345BF6F641), UINT64_C(0x01CE7973781C9EB0), + UINT64_C(0x00E6B4E5C2E7863E) }, + { UINT64_C(0x014E085628E15F36), UINT64_C(0x03113ED189D82402), + UINT64_C(0x0198521CB21CCF92), UINT64_C(0x03CB794E55F64866), + UINT64_C(0x01B6C417EBCEDCD4), UINT64_C(0x001D79C7600B1BE5), + UINT64_C(0x02EC6810EA41A2B6), UINT64_C(0x0083606535BEC6E7), + UINT64_C(0x01CA8E7CD41F2E03) } }, + { { UINT64_C(0x01BA87BAF1C9C2EC), UINT64_C(0x00D55499AAADC0DE), + UINT64_C(0x019712C990B590E5), UINT64_C(0x00384B1ACA78C747), + UINT64_C(0x03563BCAB01E0B5D), UINT64_C(0x0190C274005354FF), + UINT64_C(0x00B9D6C425986F2F), UINT64_C(0x038E491D7F2754C6), + UINT64_C(0x01B202739C50FF59) }, + { UINT64_C(0x03F58DFC16F1CACC), UINT64_C(0x00EE939AC23381A2), + UINT64_C(0x020399FE184301C9), UINT64_C(0x0351F7998C95E6D7), + UINT64_C(0x03713D0FEFC9F67B), UINT64_C(0x02651504977BC9CC), + UINT64_C(0x039962831BD8B37B), UINT64_C(0x03398A2CADA7CFCE), + UINT64_C(0x00D4F08A7E5A3118) } }, + { { UINT64_C(0x03C9826425A2D6F0), UINT64_C(0x00ECC054CD119CA9), + UINT64_C(0x00C8AF9373A85F21), UINT64_C(0x03167F72CB478C61), + UINT64_C(0x01CE9F2616361F7A), UINT64_C(0x03FB08CCEB9E536B), + UINT64_C(0x0319FD98C00E9131), UINT64_C(0x0010725A47005067), + UINT64_C(0x01D7C9A8F84C990D) }, + { UINT64_C(0x029CA261BAF35FA1), UINT64_C(0x0220865C1BFEF071), + UINT64_C(0x0115DF412660A5A4), UINT64_C(0x02257646F5EF524C), + UINT64_C(0x019648D3BF5907D4), UINT64_C(0x03B8287D6BB4E923), + UINT64_C(0x00C1831BA518EF96), UINT64_C(0x01147F1EC444000D), + UINT64_C(0x001BEB2743E8CF72) } }, + { { UINT64_C(0x017385BC9719C87C), UINT64_C(0x038E9A8AC23E84A0), + UINT64_C(0x03B86FA4168B29E6), UINT64_C(0x0259140D286A2701), + UINT64_C(0x0248D5F9426712B4), UINT64_C(0x01E876B4EE205101), + UINT64_C(0x016F0D598FB30248), UINT64_C(0x020D4EEE450E3327), + UINT64_C(0x0075F0EB2FEC4E8C) }, + { UINT64_C(0x02999066B392D834), UINT64_C(0x03A4F34FCBCA75D9), + UINT64_C(0x029F3E28ABFA2CC4), UINT64_C(0x0207E1A7B58B1513), + UINT64_C(0x036C4EE93B0C1C40), UINT64_C(0x038D0C53869B6127), + UINT64_C(0x02203321AF3FCDF2), UINT64_C(0x0016E986CD98C912), + UINT64_C(0x019AB5DBF8618B76) } }, + { { UINT64_C(0x02775F5E811FA55B), UINT64_C(0x002FF97CDF8F7EDE), + UINT64_C(0x00AA05F646486F8F), UINT64_C(0x0357ABB8FF5CB222), + UINT64_C(0x0047A8176117A59D), UINT64_C(0x01ED8538F6CBC1A6), + UINT64_C(0x0209FE9034A7F53F), UINT64_C(0x0364120EC4B9D3CF), + UINT64_C(0x019B67A37C660EDC) }, + { UINT64_C(0x0038B0D828C7A5B7), UINT64_C(0x015D9C74EAC7C806), + UINT64_C(0x0118152AAA9222B5), UINT64_C(0x01B83339A6AA2783), + UINT64_C(0x01993B4601A314EF), UINT64_C(0x0325A7A416B3D288), + UINT64_C(0x019D7FD16DD01F3A), UINT64_C(0x021D190386BFFC60), + UINT64_C(0x011CF2C0B0E2A983) } }, + { { UINT64_C(0x00D7DE7D18D8BE36), UINT64_C(0x02F0734BAAC04BF5), + UINT64_C(0x0048BB9E44C3F40B), UINT64_C(0x035994B7094672F1), + UINT64_C(0x02BD0CFD78BD4138), UINT64_C(0x0015A28B8F06A61A), + UINT64_C(0x014D5DF2A7F95274), UINT64_C(0x028141F42EAB92B1), + UINT64_C(0x00B25EF25C149754) }, + { UINT64_C(0x0057378C324BFA00), UINT64_C(0x001F4C62175258AF), + UINT64_C(0x03153B4FD5FCA3E4), UINT64_C(0x000682DC5C05BE3E), + UINT64_C(0x0330954DA1D1973A), UINT64_C(0x01BC1D711118932D), + UINT64_C(0x0168D97A2A9692FD), UINT64_C(0x012BBEB288330777), + UINT64_C(0x00E133BE00A38BE4) } }, + { { UINT64_C(0x03F431A945F8022D), UINT64_C(0x01CDF8AABB4F5212), + UINT64_C(0x02CC1D637215E00A), UINT64_C(0x03D36BA40B447ED7), + UINT64_C(0x02513AB7E6956FDD), UINT64_C(0x008D5E83EDDB9727), + UINT64_C(0x01B75785B4FDC3C7), UINT64_C(0x01EAB35E8B3CAE24), + UINT64_C(0x01339E1C87AA8ECC) }, + { UINT64_C(0x02D325A33450FD39), UINT64_C(0x00322202FEDA09D5), + UINT64_C(0x024827340C12DF41), UINT64_C(0x01E66CCCF20D3B06), + UINT64_C(0x02001372B74C978F), UINT64_C(0x012C696C6F55CD58), + UINT64_C(0x02D10F2EED8A9308), UINT64_C(0x02688747F53110D6), + UINT64_C(0x0188C13D0F26D624) } }, + { { UINT64_C(0x0239E7FBF9FFF942), UINT64_C(0x024391DE07C9C0A8), + UINT64_C(0x03BB90544685654F), UINT64_C(0x010453EE881DA06B), + UINT64_C(0x02D2A672E21ACDCD), UINT64_C(0x0047CF596F209D90), + UINT64_C(0x0321D4C73047EE1B), UINT64_C(0x008011F4FFA1ADC5), + UINT64_C(0x0051B7DD6F083F62) }, + { UINT64_C(0x00B4E0D173BF30CF), UINT64_C(0x0142CF0DBD8DD71C), + UINT64_C(0x02FE7953062D3E36), UINT64_C(0x02A5AB5A7D6604A9), + UINT64_C(0x03CC08A13AACC423), UINT64_C(0x024662C655FF1A2F), + UINT64_C(0x0179D6E29B6B1FCA), UINT64_C(0x03C8D9EF4E5B76E6), + UINT64_C(0x00CD341C315CEB11) } }, + { { UINT64_C(0x00CC4030AC8B2AF6), UINT64_C(0x016D6A39FA7E9D4C), + UINT64_C(0x0392D441BAE14C3A), UINT64_C(0x038840FEA9B7D65B), + UINT64_C(0x02398CE4933605AF), UINT64_C(0x022CD8745AC294D0), + UINT64_C(0x00B8391D34172B85), UINT64_C(0x035C1A0D5C360EA4), + UINT64_C(0x00B2CE02EA54ADC4) }, + { UINT64_C(0x004B32E432779E4D), UINT64_C(0x0396A43E6B80B056), + UINT64_C(0x035AEFC64CE26A3C), UINT64_C(0x01E9181F393D3B2C), + UINT64_C(0x0224B7B616D6F2A9), UINT64_C(0x0127AF2D0AF23C91), + UINT64_C(0x000AD7965D20EADA), UINT64_C(0x0379FD4481124D87), + UINT64_C(0x01BB6F3DFED6FF8E) } }, + { { UINT64_C(0x001E54056209B80C), UINT64_C(0x01535B3A19C72F26), + UINT64_C(0x0160AA689BA423E2), UINT64_C(0x0188ECB5D9CC3A27), + UINT64_C(0x02349FCF75CC0736), UINT64_C(0x0298585615D70FD1), + UINT64_C(0x03A32918B91165DF), UINT64_C(0x022291948224D8DA), + UINT64_C(0x0099F8E69358E726) }, + { UINT64_C(0x01F00247AE9F76E1), UINT64_C(0x0128BAD6165EB802), + UINT64_C(0x01B045052E08E61D), UINT64_C(0x032D595886F8C4D8), + UINT64_C(0x00186E393A2F7214), UINT64_C(0x016991BB5064F4DD), + UINT64_C(0x02AD9C4CF5574CEF), UINT64_C(0x0255AD5071D22CCE), + UINT64_C(0x01456916FD8D5687) } }, + { { UINT64_C(0x0133F0C2BD45283F), UINT64_C(0x01B7E6242FDEFD97), + UINT64_C(0x035D6B97C76FCAF7), UINT64_C(0x01DEAC7652ACAD19), + UINT64_C(0x03C4E3BEA33C8BB3), UINT64_C(0x0217A37165F99AD5), + UINT64_C(0x0269B9B99EC2F11A), UINT64_C(0x028A7868FC6E7D80), + UINT64_C(0x01D15668B929808B) }, + { UINT64_C(0x028D12F5F8D82B0E), UINT64_C(0x03E7880D363FAA5E), + UINT64_C(0x00437A04942C06CB), UINT64_C(0x0049CD3A9C99AEE3), + UINT64_C(0x015E2D9B6B404613), UINT64_C(0x0162924B16171DEA), + UINT64_C(0x00D5B19300B07C85), UINT64_C(0x02FDE0650EE6F8B2), + UINT64_C(0x00BB3143583D139C) } }, + { { UINT64_C(0x009BBB9CD613AC50), UINT64_C(0x0128ACBF00659E30), + UINT64_C(0x003847B178A6C039), UINT64_C(0x03CE96D95CB2F3AB), + UINT64_C(0x0319F2188F1C72FB), UINT64_C(0x0082FCC27E7E96A0), + UINT64_C(0x00E32363BCE8DAB7), UINT64_C(0x0014FD07C4ADAC1E), + UINT64_C(0x0130440FC8AE58D8) }, + { UINT64_C(0x0065ADF64359ED2E), UINT64_C(0x037ED7D5FA4BC647), + UINT64_C(0x03FF76F3555C909F), UINT64_C(0x03512196FF57D59B), + UINT64_C(0x00299F8EAAC04382), UINT64_C(0x0329BF8D6A784DA0), + UINT64_C(0x0175E680B9D87F6E), UINT64_C(0x000779614D617559), + UINT64_C(0x0091C31FD7BBAA02) } }, + { { UINT64_C(0x007961B4B2C087ED), UINT64_C(0x019162C863ECAFF8), + UINT64_C(0x02BAA68FEDC62170), UINT64_C(0x00E14BEB5E7390A9), + UINT64_C(0x014BD12090B0D96E), UINT64_C(0x01E7BB1B54107513), + UINT64_C(0x023B8205C7A4AC9C), UINT64_C(0x0077AA83FD6A3B9F), + UINT64_C(0x00B556918DDE426E) }, + { UINT64_C(0x007982C0406E7D53), UINT64_C(0x00514C5527392914), + UINT64_C(0x030F83C68AD1F365), UINT64_C(0x01248844664ABB22), + UINT64_C(0x00E9372C39E53CD3), UINT64_C(0x019288EBDD26390E), + UINT64_C(0x0175B25020B2C5E2), UINT64_C(0x01BE6F3235A8D35E), + UINT64_C(0x01BF2B1514039839) } }, + { { UINT64_C(0x00ACAC37A302E505), UINT64_C(0x027765CE9E34F2E4), + UINT64_C(0x02EC67D63AAF96D8), UINT64_C(0x000F998F38DDD8C4), + UINT64_C(0x01F09C36E648CC10), UINT64_C(0x00F522A0C94D1ACD), + UINT64_C(0x01621C139782CB28), UINT64_C(0x002ADC14FDA30F4F), + UINT64_C(0x000AFE14E60E403A) }, + { UINT64_C(0x03F6E66F873938D8), UINT64_C(0x008370549C4A240B), + UINT64_C(0x019BCDB6FBB27AB2), UINT64_C(0x03968D48A1554399), + UINT64_C(0x02AE029F24D2343E), UINT64_C(0x008518D4096DF4BA), + UINT64_C(0x011410655CE49E44), UINT64_C(0x030585BCC07AC55D), + UINT64_C(0x00DBC52BEF1D2C2E) } }, + { { UINT64_C(0x031E0D6D77452267), UINT64_C(0x02FDA38F6A949512), + UINT64_C(0x01F65ED3128F260F), UINT64_C(0x0268DE30B333E479), + UINT64_C(0x03FD84E6AC2E676C), UINT64_C(0x0393B320720BDA53), + UINT64_C(0x009EDD5FCCBB47ED), UINT64_C(0x01B82B4900272372), + UINT64_C(0x01D21A307BE4561F) }, + { UINT64_C(0x01FB6C41FDBC2674), UINT64_C(0x02FC0F6001620C6D), + UINT64_C(0x009450A0F3C6CB0F), UINT64_C(0x015385B69A47DECA), + UINT64_C(0x026E2296F08B9474), UINT64_C(0x0194DEC7BE891DCB), + UINT64_C(0x008B5DA06C5F46EF), UINT64_C(0x019F5A58030A2A18), + UINT64_C(0x00207771A8172F5B) } }, + { { UINT64_C(0x02D0EED2AA2FCC67), UINT64_C(0x028799FC7DD58724), + UINT64_C(0x01664BF5933707D3), UINT64_C(0x039B5E487A0167D1), + UINT64_C(0x02767C865F544F76), UINT64_C(0x012879933B9C8060), + UINT64_C(0x03EBB40C5524547A), UINT64_C(0x0173A7851D6D690E), + UINT64_C(0x01CF4AB59422F25D) }, + { UINT64_C(0x02E0C44B926C197B), UINT64_C(0x021DCFA310FAD65B), + UINT64_C(0x03309DFCCBCED9CA), UINT64_C(0x02A11F05E3D88EA0), + UINT64_C(0x039FE02B0CE3AE95), UINT64_C(0x023B5E3CAC5E3536), + UINT64_C(0x02C9903F85BF51A2), UINT64_C(0x018141A1EBBB4D03), + UINT64_C(0x01B6F9AE1517FBCC) } }, + }, + { + { { UINT64_C(0x01CE126EEC3D1383), UINT64_C(0x03E60292016C63B4), + UINT64_C(0x01086FC1B1F4E0C7), UINT64_C(0x02B824B832819651), + UINT64_C(0x018B5EE5C0AC1703), UINT64_C(0x03467EED60D31DFE), + UINT64_C(0x0370BD13E722F576), UINT64_C(0x01C406BA2A512BD9), + UINT64_C(0x00D7E1D110502A7C) }, + { UINT64_C(0x02029FD2CA303000), UINT64_C(0x031CB26B2D4BB358), + UINT64_C(0x001AACC8DD8A2366), UINT64_C(0x02FD746E61373E27), + UINT64_C(0x01D1A80D5295C235), UINT64_C(0x01FA56B74D0D3443), + UINT64_C(0x0203660094D0A8F7), UINT64_C(0x006ACC0E24009F44), + UINT64_C(0x007532FAF2732979) } }, + { { UINT64_C(0x00CC8937C5CFE5E0), UINT64_C(0x036CA3F94D098379), + UINT64_C(0x0127E76C1F2F6B01), UINT64_C(0x03F376385910CC44), + UINT64_C(0x005AE2B93F0F4F7C), UINT64_C(0x001F51D975E23E7E), + UINT64_C(0x0159FF4F64431F80), UINT64_C(0x0215FECEB62BCA1C), + UINT64_C(0x00168401E32600A7) }, + { UINT64_C(0x01B5A301E78A8DB5), UINT64_C(0x00FF512D35D3F2D2), + UINT64_C(0x0354D19F77E5A97B), UINT64_C(0x0271EFC5E9AFD789), + UINT64_C(0x006980179F908FBC), UINT64_C(0x034A31A6FEF922C2), + UINT64_C(0x01832DCC33A8480C), UINT64_C(0x02589E9D28BAFB44), + UINT64_C(0x0115572B5F3957D4) } }, + { { UINT64_C(0x02B1A9337E8401D3), UINT64_C(0x0290DCDD374D1722), + UINT64_C(0x03B06DFC52EC6DB8), UINT64_C(0x0230EA32F50E3F05), + UINT64_C(0x00FF74654453A452), UINT64_C(0x01A248F21E47C014), + UINT64_C(0x01E2CED97C15ABF4), UINT64_C(0x0283D12E9548735C), + UINT64_C(0x011DE7FF5CC44367) }, + { UINT64_C(0x0397C8B2CA828FA8), UINT64_C(0x023C2C16EF221608), + UINT64_C(0x0079F7CCDCEE62D1), UINT64_C(0x02ABBC4A12FA2ABB), + UINT64_C(0x02D3E0D3AF058906), UINT64_C(0x016EE5FFCAFF1F4D), + UINT64_C(0x0383A01497A17543), UINT64_C(0x015456C9C2BA3AA0), + UINT64_C(0x00833A7F70B8DB1E) } }, + { { UINT64_C(0x02874A121147F509), UINT64_C(0x00814720ED638371), + UINT64_C(0x03306823E9395088), UINT64_C(0x02A5E552F8389554), + UINT64_C(0x00F06CF7F0BA5751), UINT64_C(0x030415DEE1815B81), + UINT64_C(0x00E24A9DB057CA02), UINT64_C(0x0130F23B0BDFF500), + UINT64_C(0x00CD32356D2FBCF3) }, + { UINT64_C(0x031835514BB690A0), UINT64_C(0x011475889E6369E4), + UINT64_C(0x02A366B8DA44B373), UINT64_C(0x01336BAE9A4C91D2), + UINT64_C(0x0321F6D6C8947D98), UINT64_C(0x0331E2910F0F8ECA), + UINT64_C(0x01F6B3937B0234FE), UINT64_C(0x016C792D27998656), + UINT64_C(0x009729CAFA8B37BB) } }, + { { UINT64_C(0x030BF08BF55F34E4), UINT64_C(0x01472A877A6E6046), + UINT64_C(0x03502971975705FE), UINT64_C(0x00F5A66B1DDF090E), + UINT64_C(0x01DD9C80102CADCC), UINT64_C(0x004EB57A202D88C1), + UINT64_C(0x0383DED93A003D31), UINT64_C(0x00DF42EE4835E279), + UINT64_C(0x010B2A2DF2E8CDFC) }, + { UINT64_C(0x00E3757112860379), UINT64_C(0x0049E41486F1D305), + UINT64_C(0x007F50407D2B699F), UINT64_C(0x0186CFF64543014A), + UINT64_C(0x015D637AD6EB6B8D), UINT64_C(0x03EDC1A07906ADD6), + UINT64_C(0x025B1CE8EFA6E451), UINT64_C(0x0281938DC6CCB3C0), + UINT64_C(0x01E95BF35241E85F) } }, + { { UINT64_C(0x01900B5C8B1B724E), UINT64_C(0x00091B0E23027016), + UINT64_C(0x033EA7B567F8D8DD), UINT64_C(0x0149CA26370EF3C0), + UINT64_C(0x0224F7CCEEAEB621), UINT64_C(0x01056822C07633BE), + UINT64_C(0x02682C8A34D4C312), UINT64_C(0x017F1D80C56ACAFB), + UINT64_C(0x000D28BD510F85EC) }, + { UINT64_C(0x0031C759D505A0E7), UINT64_C(0x00695B369E0D5C70), + UINT64_C(0x007414EC503E140D), UINT64_C(0x02998878F14B0559), + UINT64_C(0x03EB48B235BD02B9), UINT64_C(0x02030C241863472E), + UINT64_C(0x00302A0DF1BDB378), UINT64_C(0x02ADB25754F52D99), + UINT64_C(0x01EBEAF9E9BDE9AC) } }, + { { UINT64_C(0x0016D2E6C4CB8040), UINT64_C(0x0251BE4AB3BBC8D1), + UINT64_C(0x00979A86B1EA6004), UINT64_C(0x03197F4F1967EFAE), + UINT64_C(0x03A8E572D3878481), UINT64_C(0x0175BC0B4A3D453E), + UINT64_C(0x0067A078B9E4BDD5), UINT64_C(0x00C290F9DB5CD51A), + UINT64_C(0x00C8A1050BE75174) }, + { UINT64_C(0x0138FA01526AE111), UINT64_C(0x01E92EC50AC0E2D9), + UINT64_C(0x03430EFE4DD66F27), UINT64_C(0x027E3E362221AF89), + UINT64_C(0x0065DC30B6D8ED5E), UINT64_C(0x0194B4AA3299C658), + UINT64_C(0x03FCCBD1A1EE5AFC), UINT64_C(0x0011C786A00C112C), + UINT64_C(0x01770EC65BD04CBD) } }, + { { UINT64_C(0x0219978F485193F0), UINT64_C(0x0169EF77837E1846), + UINT64_C(0x039A4F73B9DC8ADB), UINT64_C(0x0060DDE7E026EABA), + UINT64_C(0x033EDEE638C66335), UINT64_C(0x0296BFF6A6D575A3), + UINT64_C(0x01B793FCB261CF96), UINT64_C(0x00066B2DAA6E8B8E), + UINT64_C(0x00FAA4EE0DF08936) }, + { UINT64_C(0x0082665D53161177), UINT64_C(0x00BF125BA82F6D39), + UINT64_C(0x022B5DABCDFDBE3B), UINT64_C(0x021CD6983941E0F2), + UINT64_C(0x010414D9EC902549), UINT64_C(0x03C8E709DAE4453B), + UINT64_C(0x03B39712A9467665), UINT64_C(0x01718D188F0108E5), + UINT64_C(0x0001E683E6E53299) } }, + { { UINT64_C(0x026BEC9ED63E2975), UINT64_C(0x02445B0FA3670F21), + UINT64_C(0x01B0436EA7FA88A2), UINT64_C(0x01B3E0317834AC34), + UINT64_C(0x0370A51D7EBF7519), UINT64_C(0x028FE5E7A5374634), + UINT64_C(0x004F9C7DD9D61B9E), UINT64_C(0x024629F3A018136E), + UINT64_C(0x01B14207DD17A593) }, + { UINT64_C(0x02B49CBF0B981980), UINT64_C(0x03D510AA4EE52E56), + UINT64_C(0x0223FC5E38C54336), UINT64_C(0x006CECAD3BD995A0), + UINT64_C(0x01C1E9CE9CFF80F2), UINT64_C(0x03F2A4F91A9DFFC4), + UINT64_C(0x023C10907D4D0C02), UINT64_C(0x0266DE5575DC75DB), + UINT64_C(0x00C42F22C54D0AE1) } }, + { { UINT64_C(0x02CA7240C82B5AA4), UINT64_C(0x009FC67BD6157E6E), + UINT64_C(0x0237AEA0E986F61E), UINT64_C(0x0295536DA6F6D324), + UINT64_C(0x03CCCEAED7D090D6), UINT64_C(0x02AEB5185AD3ED8F), + UINT64_C(0x01709E10CC89909F), UINT64_C(0x02104E7DD9DB3C2E), + UINT64_C(0x018FBE92AA69FDDA) }, + { UINT64_C(0x019CC5A0410AA767), UINT64_C(0x01BD2A1F9D7CB636), + UINT64_C(0x016925EEC5FA539B), UINT64_C(0x030EE211BCC86603), + UINT64_C(0x02286DD13B9B314D), UINT64_C(0x019EE14925C53864), + UINT64_C(0x03BA30594CCCD2C4), UINT64_C(0x03CF135ECF524017), + UINT64_C(0x009675B7F38F7A5F) } }, + { { UINT64_C(0x034097FDD5C529C4), UINT64_C(0x022BABC53852C90D), + UINT64_C(0x005FA5449B2CFEAE), UINT64_C(0x0213E3712D2D891B), + UINT64_C(0x01EC7B3EEE99C138), UINT64_C(0x027C357D0B9CBABB), + UINT64_C(0x025A19E877887A6F), UINT64_C(0x00D4CD3E5DC97F03), + UINT64_C(0x01A0BD7971FE9BC8) }, + { UINT64_C(0x01302079C035FA1B), UINT64_C(0x03A553C1D7472F9E), + UINT64_C(0x01A4254310460FA3), UINT64_C(0x00172E37209ED67F), + UINT64_C(0x01598766A435004B), UINT64_C(0x015F6DA2FE9089F7), + UINT64_C(0x03D7A8AD6610ED72), UINT64_C(0x00218A47CD395F7C), + UINT64_C(0x01CEBC586BD69C42) } }, + { { UINT64_C(0x005E156C633E8718), UINT64_C(0x036F6921E8311E5A), + UINT64_C(0x012516B3E4747664), UINT64_C(0x016B6481265AF56F), + UINT64_C(0x005B9CA959873FB0), UINT64_C(0x01215A2E38706CDD), + UINT64_C(0x00C64AAAEE1FE5AB), UINT64_C(0x009494AE29DD5833), + UINT64_C(0x001DE0FFFA144A84) }, + { UINT64_C(0x01AB0B04D7864A53), UINT64_C(0x03B6589B739D3720), + UINT64_C(0x0342AE6EE03B4D2D), UINT64_C(0x0366C4CD40B083D3), + UINT64_C(0x02E583D735216939), UINT64_C(0x028069A08705938A), + UINT64_C(0x03470E4558BB0247), UINT64_C(0x037269A3A352E23F), + UINT64_C(0x000A1B500F437A69) } }, + { { UINT64_C(0x017C93D92A097CC4), UINT64_C(0x001BA88CC46C7150), + UINT64_C(0x01AE786C3A4D3E20), UINT64_C(0x028BF5869DC58997), + UINT64_C(0x02E52726A122777F), UINT64_C(0x00972F198872159B), + UINT64_C(0x02552DD5544B0BA5), UINT64_C(0x009FAC089C64945A), + UINT64_C(0x00A926F159FE26EE) }, + { UINT64_C(0x003998CBAECC32F4), UINT64_C(0x01BD7CE18DCAAA28), + UINT64_C(0x00A1F5FB988BB383), UINT64_C(0x03AEB19DEFD835C2), + UINT64_C(0x00244E47BC8D865E), UINT64_C(0x0038157724E1BB10), + UINT64_C(0x007BD8BF38E25231), UINT64_C(0x00C5E24E2CD69DAB), + UINT64_C(0x01A779CC34494897) } }, + { { UINT64_C(0x004BD43B7D176E2E), UINT64_C(0x005E93AB83087469), + UINT64_C(0x03E80C170CBB6730), UINT64_C(0x02CA4F7C8BEDBE63), + UINT64_C(0x02A85DD542AB5799), UINT64_C(0x0066D2B71D97D372), + UINT64_C(0x03558E6854EDDBC6), UINT64_C(0x01014B87714911B3), + UINT64_C(0x0150C0A4F996E45F) }, + { UINT64_C(0x01E0E94EA8A05AA1), UINT64_C(0x02AFE47CFC92BB70), + UINT64_C(0x0203EC4D3CE6EAF1), UINT64_C(0x024771DB1D696301), + UINT64_C(0x0196D9AA529C496E), UINT64_C(0x03B56E31398127F0), + UINT64_C(0x0387E08D7862B4A2), UINT64_C(0x032941073AE64CE3), + UINT64_C(0x0000E769C78F3C16) } }, + { { UINT64_C(0x034AFDE7FF46E9D5), UINT64_C(0x01174874945BB22A), + UINT64_C(0x0315AE08354CD33E), UINT64_C(0x020944101FCD5584), + UINT64_C(0x02AD3EF0CDDE6E15), UINT64_C(0x030A2698AB480B82), + UINT64_C(0x03BF15403C92749F), UINT64_C(0x025EFF1408AEDEF4), + UINT64_C(0x00853B2112F03584) }, + { UINT64_C(0x017A76C60E367447), UINT64_C(0x031C3B84E9CFE4B6), + UINT64_C(0x0383807320E00DD1), UINT64_C(0x02152F5E5EE3BE00), + UINT64_C(0x035287A9CC92FA2D), UINT64_C(0x0007C4F52ABBB00A), + UINT64_C(0x006B2558DC7D9071), UINT64_C(0x0266DBFFAED357E3), + UINT64_C(0x007E76EA86C8A78C) } }, + { { UINT64_C(0x00DA97D33D831A04), UINT64_C(0x0273CA87AB20DA80), + UINT64_C(0x004C77C7C118ED92), UINT64_C(0x00F87131473BDF57), + UINT64_C(0x036EC3E2E0DE7125), UINT64_C(0x00C7E8EADB491D0D), + UINT64_C(0x0299CB19B912B7BF), UINT64_C(0x0399A443D4E010F6), + UINT64_C(0x0098FCF8A99C2A16) }, + { UINT64_C(0x030D9571D49B2FC3), UINT64_C(0x02127D20D334D6E9), + UINT64_C(0x00CF98756BB05081), UINT64_C(0x02A955A34EA7C78A), + UINT64_C(0x0099BBA4C82FA729), UINT64_C(0x03B80CA8EED74492), + UINT64_C(0x03A7668CD742B7C3), UINT64_C(0x039AA1A4CD0B2F61), + UINT64_C(0x01769BB74BE7BFCF) } }, + }, + { + { { UINT64_C(0x01AE6D7AF8ECE594), UINT64_C(0x004BD233382C1067), + UINT64_C(0x02FC7E73749707AD), UINT64_C(0x01A0C47D78BA765F), + UINT64_C(0x02BB7416407B8B16), UINT64_C(0x02F996A9035A29ED), + UINT64_C(0x01C78A5F9EA3DEA9), UINT64_C(0x03997AA8F9A04684), + UINT64_C(0x0062155AD4E50AC6) }, + { UINT64_C(0x0136D4FEFEBBFAD7), UINT64_C(0x03C498A8C3B5B196), + UINT64_C(0x03AF4B2081A7DC94), UINT64_C(0x02FE1693A20D804F), + UINT64_C(0x0019DBDAD1684FFD), UINT64_C(0x03E47903EABFC90E), + UINT64_C(0x00EA7078F3484441), UINT64_C(0x037A0851741BD87B), + UINT64_C(0x004DEB7A4980ECBA) } }, + { { UINT64_C(0x02A998A0008164D4), UINT64_C(0x014B73504FD3FC3A), + UINT64_C(0x00C19E4FF76A915D), UINT64_C(0x00D30C3B2FD0EC60), + UINT64_C(0x01518FD432879FDC), UINT64_C(0x018585905FB0DE73), + UINT64_C(0x002E0E88A51BB32E), UINT64_C(0x011E824BA1621756), + UINT64_C(0x008F5503550AE008) }, + { UINT64_C(0x01F4C5CC039B003C), UINT64_C(0x034FE4F1205365F7), + UINT64_C(0x029B502075F020C8), UINT64_C(0x02E622483E3884F2), + UINT64_C(0x0096DBF1B7347D87), UINT64_C(0x03E49F71A5BBC472), + UINT64_C(0x028F694B092BA1CC), UINT64_C(0x03911DA84B731F41), + UINT64_C(0x00AEE98DB68D16A6) } }, + { { UINT64_C(0x03335FA8EB78796F), UINT64_C(0x02878D6632487FA2), + UINT64_C(0x023DC13EBB873632), UINT64_C(0x0328E4AB268A2A07), + UINT64_C(0x017A111FE36EA0A1), UINT64_C(0x02DD260BC4AB23DF), + UINT64_C(0x02BD012E8019E481), UINT64_C(0x02DAEA5C2102ACDC), + UINT64_C(0x0191F08F46778030) }, + { UINT64_C(0x01DAFF85FF6CA70B), UINT64_C(0x00C20C713262D23C), + UINT64_C(0x0002F4B44F09083A), UINT64_C(0x014BFF17F10ECF45), + UINT64_C(0x025ADB2237EA42A8), UINT64_C(0x03E47544193ED683), + UINT64_C(0x016D405A3F97D5CE), UINT64_C(0x03412AAA28009BC3), + UINT64_C(0x0061A9DB41BEFEDC) } }, + { { UINT64_C(0x02DE586F26762E69), UINT64_C(0x016435D71514BA52), + UINT64_C(0x016D7A3D17B63A4D), UINT64_C(0x026D50DCE42619B6), + UINT64_C(0x0071889F59482029), UINT64_C(0x011CE57167125C3C), + UINT64_C(0x00A0EA2BE409EA4A), UINT64_C(0x009EDE87052C5E58), + UINT64_C(0x01024A33C8A03073) }, + { UINT64_C(0x0190FE7C2B54A6C6), UINT64_C(0x006AD6F23DFB4339), + UINT64_C(0x01A290051C927B4A), UINT64_C(0x001E3AB0900247C6), + UINT64_C(0x02F0CF556BD9F5D6), UINT64_C(0x0044A9D7E6F09A3D), + UINT64_C(0x03647C4823C77404), UINT64_C(0x0174246A05A125F4), + UINT64_C(0x005046F70E49B3B4) } }, + { { UINT64_C(0x0168F14947F5FEA0), UINT64_C(0x00769E99AB9E6CB3), + UINT64_C(0x0132518C89E21038), UINT64_C(0x01B680C1A8696720), + UINT64_C(0x002ED6053CD44327), UINT64_C(0x01D30DD43B7E58A9), + UINT64_C(0x00944E2E081D9491), UINT64_C(0x006831ACBEAD123C), + UINT64_C(0x0152C11DC5777195) }, + { UINT64_C(0x00241773802E1A49), UINT64_C(0x01BAF7037807F846), + UINT64_C(0x03D3C7A48FA494BE), UINT64_C(0x011E5017010FAAB7), + UINT64_C(0x02754857375E5F4A), UINT64_C(0x03779B43EFE7F8E1), + UINT64_C(0x0012FF3BABC982CB), UINT64_C(0x00FFF200A782A57D), + UINT64_C(0x01525BFCB1CE27F1) } }, + { { UINT64_C(0x03E552EA093A81E5), UINT64_C(0x0289B3D7E8ED9281), + UINT64_C(0x0342009AC81D0D79), UINT64_C(0x03AD34454A991783), + UINT64_C(0x01E2910F69599605), UINT64_C(0x03D879F03BB2582D), + UINT64_C(0x027BC06449C49ACB), UINT64_C(0x008DC219F862EDC8), + UINT64_C(0x01C5BFA6129C1E94) }, + { UINT64_C(0x026A51D1748353E7), UINT64_C(0x0181475224C056F6), + UINT64_C(0x00C626EAA883505E), UINT64_C(0x0279EE327830A7B4), + UINT64_C(0x0320D8F515A684E8), UINT64_C(0x00C3F8E23CD44D3F), + UINT64_C(0x02C122EE12C67CA1), UINT64_C(0x00E99C91530D5183), + UINT64_C(0x0021144C6B142C61) } }, + { { UINT64_C(0x011D351AD93C77DA), UINT64_C(0x03AA1509EA474780), + UINT64_C(0x018659BD1EF489E2), UINT64_C(0x003305C7CD548712), + UINT64_C(0x0274078260A570D7), UINT64_C(0x0053143C92277CEB), + UINT64_C(0x002C9848EA865C9F), UINT64_C(0x02CCE08E86A1AEA9), + UINT64_C(0x017387D78B16B104) }, + { UINT64_C(0x004AA27AD541016D), UINT64_C(0x018249526E484E54), + UINT64_C(0x02AB312423D0089E), UINT64_C(0x0219D7F11A43C693), + UINT64_C(0x02063682A176BD49), UINT64_C(0x03B53A444F4AA295), + UINT64_C(0x00795B99C8C7C949), UINT64_C(0x03E13055864354E1), + UINT64_C(0x00AD0290F60CD7D0) } }, + { { UINT64_C(0x012D2A436D526DD9), UINT64_C(0x01CD402DD6D978C6), + UINT64_C(0x00A58E861B88A485), UINT64_C(0x02D5660B63C2B513), + UINT64_C(0x00AC661A50344950), UINT64_C(0x005912EC7C3046DF), + UINT64_C(0x00386C50A42C0A1A), UINT64_C(0x03AB81C1B172201D), + UINT64_C(0x00C7E276190DAFE0) }, + { UINT64_C(0x02C2EF02CE4F4EFB), UINT64_C(0x036C62A28EE8E529), + UINT64_C(0x007713DEA66609AC), UINT64_C(0x0335AC64B1B06D35), + UINT64_C(0x030C33E87E4697D9), UINT64_C(0x02A8B6DA5FD2C060), + UINT64_C(0x00A7681837DA7123), UINT64_C(0x034383051138278A), + UINT64_C(0x0100BA5CB675B5C3) } }, + { { UINT64_C(0x007A90498A37CD61), UINT64_C(0x00C21A3950646D6E), + UINT64_C(0x00E24CC900B23BA5), UINT64_C(0x00177482F428680B), + UINT64_C(0x008C265BAA81CF89), UINT64_C(0x035D3B4D224FFF8E), + UINT64_C(0x036D6B85A5B0977B), UINT64_C(0x00D1075A6C1311DD), + UINT64_C(0x01CE20C3E0DE4C26) }, + { UINT64_C(0x03983305308A7408), UINT64_C(0x034CC1C79BB9BDAE), + UINT64_C(0x02079940C900D507), UINT64_C(0x011184B7705AB688), + UINT64_C(0x00BE018DECC7C858), UINT64_C(0x00059833EA10EFD5), + UINT64_C(0x03D3C58726A0CFF9), UINT64_C(0x03FAC56BC268E09A), + UINT64_C(0x00AF6C171D653277) } }, + { { UINT64_C(0x01151276D19DDB66), UINT64_C(0x00BE849EE9A2D3A8), + UINT64_C(0x02C6A7580CC1CD5D), UINT64_C(0x03AE7FCF32E2402D), + UINT64_C(0x0077F3388646E57B), UINT64_C(0x0321275FFC38AED4), + UINT64_C(0x035220194FAC16E6), UINT64_C(0x00AC60DD1664CBF4), + UINT64_C(0x005C9F4FAEB1E475) }, + { UINT64_C(0x03454E2FDA228C02), UINT64_C(0x03CE54CE918B9E80), + UINT64_C(0x01E6700CB1251E2C), UINT64_C(0x004D9EF2E269258E), + UINT64_C(0x0271A9DFD10397F8), UINT64_C(0x01D68E1301C08065), + UINT64_C(0x0255D3F4888FC07C), UINT64_C(0x01EA14C32D6DB6C1), + UINT64_C(0x00641A5E7FF0CED4) } }, + { { UINT64_C(0x03D2DB7494E80EB1), UINT64_C(0x03429AAC7DF50EDF), + UINT64_C(0x0193B4233D776372), UINT64_C(0x00FA6676BCB0445B), + UINT64_C(0x00962AF93FA06ADE), UINT64_C(0x00ED262149C44EC5), + UINT64_C(0x00DD0F0802C2CD3B), UINT64_C(0x0349A7F09C0CD9BA), + UINT64_C(0x019BCEE240624924) }, + { UINT64_C(0x0301B8CB30F92986), UINT64_C(0x02FBD5618F84FCAA), + UINT64_C(0x020844CC6DEA56EF), UINT64_C(0x0399AC423AE9922A), + UINT64_C(0x0304B577679CF04F), UINT64_C(0x033A00D5B3E1E90B), + UINT64_C(0x02E0EA5DF7501CB6), UINT64_C(0x01AEEBA7909CF3AB), + UINT64_C(0x00D1F739C1192316) } }, + { { UINT64_C(0x03FBED19829AE558), UINT64_C(0x018A508538E70057), + UINT64_C(0x00CB16FE844A9E7C), UINT64_C(0x02A5D97534D7DBBC), + UINT64_C(0x005769E43FDAB701), UINT64_C(0x02371B260F0C6E67), + UINT64_C(0x0088CED91D562ACB), UINT64_C(0x03FF0E5F0D26F719), + UINT64_C(0x009911094F5E4AA4) }, + { UINT64_C(0x014DA634DAAD22D1), UINT64_C(0x0126CD74DB263614), + UINT64_C(0x00B20F1368A80FE1), UINT64_C(0x01C40150F01BDEEF), + UINT64_C(0x036B7B115D665EA4), UINT64_C(0x00E64D810EAB1790), + UINT64_C(0x037432C58B6DDE4A), UINT64_C(0x02689716E469337C), + UINT64_C(0x009023B703EED1A4) } }, + { { UINT64_C(0x0168DF986EB8B398), UINT64_C(0x0373053537795BF1), + UINT64_C(0x018911988685F26D), UINT64_C(0x0387383FA6C93770), + UINT64_C(0x019704736EAD528F), UINT64_C(0x0271A2FD2A7AB31F), + UINT64_C(0x016F759D385DF60B), UINT64_C(0x00588A673CE9E385), + UINT64_C(0x00F00D2C74D140B1) }, + { UINT64_C(0x037761186D05FF6A), UINT64_C(0x021D5810D7AE7578), + UINT64_C(0x032F7D951B6FE596), UINT64_C(0x00F101711823BB39), + UINT64_C(0x028DE92770998580), UINT64_C(0x037C0C99F0D97BF8), + UINT64_C(0x030EB60AA7504E10), UINT64_C(0x038624C9A9EBB17E), + UINT64_C(0x0117D8E0506A5993) } }, + { { UINT64_C(0x02D315A154D9F1F8), UINT64_C(0x00A34DBD30332164), + UINT64_C(0x0306F497C34DB615), UINT64_C(0x03599315A4DB339F), + UINT64_C(0x007E9E0F8E2399AC), UINT64_C(0x003A93148F4FA95A), + UINT64_C(0x011F62B5F0DC45EF), UINT64_C(0x02C2CA027E1C8CCA), + UINT64_C(0x017EDB2AB60DCF2F) }, + { UINT64_C(0x03D0BE47BDAF0C41), UINT64_C(0x0261770EA9BAF337), + UINT64_C(0x00123C9A8D5C885C), UINT64_C(0x02304942CA223A54), + UINT64_C(0x027514FEE2CC680A), UINT64_C(0x02845D9CADE7E084), + UINT64_C(0x037BF3E603649E24), UINT64_C(0x00221D7FD1EC9BB3), + UINT64_C(0x019ABE2E017E3282) } }, + { { UINT64_C(0x022C310986DBC74A), UINT64_C(0x016910C9D8D292FA), + UINT64_C(0x0168FBA7C0C784B2), UINT64_C(0x02F0C2E785D2A006), + UINT64_C(0x01AE45ADAA754923), UINT64_C(0x0340D3039A77094C), + UINT64_C(0x028C800560A74DE4), UINT64_C(0x0209DAB7CF99A92A), + UINT64_C(0x01A7AE95C3D65A81) }, + { UINT64_C(0x03D0EF28C4FA3D53), UINT64_C(0x01C7BD38B1347859), + UINT64_C(0x0005A7461F21783E), UINT64_C(0x01367207E2FE3122), + UINT64_C(0x033746BBB79E2E44), UINT64_C(0x0279FE17A5803572), + UINT64_C(0x03015592FFEC7617), UINT64_C(0x02742174C25F4D16), + UINT64_C(0x00E410A0B89682D7) } }, + { { UINT64_C(0x02B22FBEE727DDB2), UINT64_C(0x024FD40DFE0DC5F9), + UINT64_C(0x015C3DCCFE2E8278), UINT64_C(0x029992449755EB6E), + UINT64_C(0x03FD36B4574277E1), UINT64_C(0x02D49C964F2299EE), + UINT64_C(0x021CD67B9805D246), UINT64_C(0x0157D17DBA6DBB8F), + UINT64_C(0x014315532B63B009) }, + { UINT64_C(0x0192F41C11B068CF), UINT64_C(0x013ADE386B9A6252), + UINT64_C(0x0023510A4F9C5B28), UINT64_C(0x027BD3DC9B9B0039), + UINT64_C(0x02377F19B4B907D4), UINT64_C(0x0292B925A6106638), + UINT64_C(0x01058CF22E01616A), UINT64_C(0x017799C00E576B04), + UINT64_C(0x00A289A954F56291) } }, + }, + { + { { UINT64_C(0x00C4AC143FFE4858), UINT64_C(0x0306D22EAAC4A5AD), + UINT64_C(0x01F0A5791E3783D9), UINT64_C(0x03A0A974CB2ACA2D), + UINT64_C(0x02E76FB3F03AA34D), UINT64_C(0x0217400AE3A40C22), + UINT64_C(0x0040CD3B74A7ED3C), UINT64_C(0x00FCB122891AAD96), + UINT64_C(0x01B8C8494718771D) }, + { UINT64_C(0x03F57D14A28DA023), UINT64_C(0x022E364741E3E46C), + UINT64_C(0x01A7ABA67F27FDBC), UINT64_C(0x030FF1837DC3E97D), + UINT64_C(0x00618486CF4908AD), UINT64_C(0x02CF161553F374F8), + UINT64_C(0x019DD012E725571E), UINT64_C(0x033EDF6BF47BD717), + UINT64_C(0x0125806554EE19B9) } }, + { { UINT64_C(0x018E9A7BA994A7B1), UINT64_C(0x02AC0D7BEC6A8983), + UINT64_C(0x03D38D705E07CD01), UINT64_C(0x005566DD3C426505), + UINT64_C(0x0067EB2AB8C5C6E4), UINT64_C(0x02833D0E2656CD6B), + UINT64_C(0x01DDCA9C78AA1909), UINT64_C(0x00EDF1FB3DAA7F12), + UINT64_C(0x0166F72F3DE51C63) }, + { UINT64_C(0x02B78FAEB96F6D73), UINT64_C(0x02052F35A5545293), + UINT64_C(0x005CD62AD9BF553E), UINT64_C(0x00B728FA50CC968E), + UINT64_C(0x019295FA16301250), UINT64_C(0x0287D8B59A13D480), + UINT64_C(0x0316813DDF4A21F3), UINT64_C(0x01769E5723184C7C), + UINT64_C(0x0066E0E7009AE7B5) } }, + { { UINT64_C(0x021F2EE46CDE12CD), UINT64_C(0x003D0000412CCD1F), + UINT64_C(0x02C67E761CB63537), UINT64_C(0x02C1A38D4F403A59), + UINT64_C(0x03B812F8D1F26B87), UINT64_C(0x029994AD5ACE97AC), + UINT64_C(0x026C55C785488093), UINT64_C(0x01869CEF172A91D6), + UINT64_C(0x01661593B4702F1D) }, + { UINT64_C(0x0197935A2366B021), UINT64_C(0x01C8C53ECC9EEE7B), + UINT64_C(0x02C636CFB825AB8B), UINT64_C(0x02EEC0E46E96B427), + UINT64_C(0x00525F145382F270), UINT64_C(0x0133F597DCA61576), + UINT64_C(0x0237ACF913367D38), UINT64_C(0x02C6B96EB5398F41), + UINT64_C(0x0088A6A556F6EF14) } }, + { { UINT64_C(0x03AE1C8DCCD34315), UINT64_C(0x0157B6DF5CCF4DF6), + UINT64_C(0x02191AB191DCA071), UINT64_C(0x01897CF46F10173C), + UINT64_C(0x02767320BD61533A), UINT64_C(0x01A9DAB7019D6315), + UINT64_C(0x01911BB32715F1BB), UINT64_C(0x001C7F74F8A656CA), + UINT64_C(0x0009C70F08ACB68E) }, + { UINT64_C(0x0072A1ED9356A25A), UINT64_C(0x01556970A7D5EEF6), + UINT64_C(0x0350BEDB0F71D649), UINT64_C(0x03EA3565DDFF826F), + UINT64_C(0x013B29E08B1AF8F4), UINT64_C(0x0331B92ACB74C5CA), + UINT64_C(0x03A4E6E26F5AAC1D), UINT64_C(0x036F06A79D110118), + UINT64_C(0x00631FDFA318D2BC) } }, + { { UINT64_C(0x035871450EAD4FF9), UINT64_C(0x0045783A9CFF37E4), + UINT64_C(0x03713AE92AC33512), UINT64_C(0x009A3896CE34EF6D), + UINT64_C(0x03A8EE82555DC9D1), UINT64_C(0x002C620829E4335D), + UINT64_C(0x0375E016D1AE1B50), UINT64_C(0x016D891B140E00CD), + UINT64_C(0x00097FE78FE880E9) }, + { UINT64_C(0x01A323FFCB8B195A), UINT64_C(0x014E7DA6CA0AAFF4), + UINT64_C(0x00C88E8E6528DDB5), UINT64_C(0x01A720372EE878E6), + UINT64_C(0x015A2426F3EF9BB8), UINT64_C(0x01604A559CF4A620), + UINT64_C(0x02C8F10B967488E1), UINT64_C(0x028191262B209448), + UINT64_C(0x019E5661C083C48E) } }, + { { UINT64_C(0x01D1ED07D6920A2A), UINT64_C(0x03909AA105A814DB), + UINT64_C(0x029B1BBB7F2ECAC2), UINT64_C(0x03BB4096CC1FBE27), + UINT64_C(0x0382CAD68C150CCC), UINT64_C(0x00F1CBB480EE5E69), + UINT64_C(0x03933B382F4CE45C), UINT64_C(0x0283D1969E6EC1D6), + UINT64_C(0x008C6BE4F8FBF5F9) }, + { UINT64_C(0x00C2A30AF1CA3CCC), UINT64_C(0x02FF4D4359C3CABE), + UINT64_C(0x020AA78B337657B0), UINT64_C(0x01C5C613D10C423A), + UINT64_C(0x003249BB2418CB6D), UINT64_C(0x00CAB4378A53687C), + UINT64_C(0x0147E31B6118850C), UINT64_C(0x02D08DC29C2D596C), + UINT64_C(0x00409A1F9C9C0372) } }, + { { UINT64_C(0x03985FC5DEB5DCD3), UINT64_C(0x02328F30C46302C2), + UINT64_C(0x00260388D4747802), UINT64_C(0x03BFBB0240E60F52), + UINT64_C(0x03B209042D288213), UINT64_C(0x00F7BBEE239C04F6), + UINT64_C(0x039A7EE4CF9007B4), UINT64_C(0x01BFEC97A07FF7ED), + UINT64_C(0x00F46BA7F4461BE4) }, + { UINT64_C(0x02FF04BE53B68E6C), UINT64_C(0x01CA69133AC1C9A1), + UINT64_C(0x001C0711D4BE94AE), UINT64_C(0x02E7507B45945E53), + UINT64_C(0x011B7A5F7EC81DBE), UINT64_C(0x0329BFC6DA7CDB63), + UINT64_C(0x01FCD3B287A0A497), UINT64_C(0x01F250F924D3B826), + UINT64_C(0x0174EABAF5F90BA0) } }, + { { UINT64_C(0x0288B8614B07B1BF), UINT64_C(0x00AE0C951E1C4290), + UINT64_C(0x01FC49AB7CD0CA2F), UINT64_C(0x0139ED7FA367ECE7), + UINT64_C(0x007ACFF8F0933B14), UINT64_C(0x01BE527A6CE02D5F), + UINT64_C(0x03F3D3A06B11DFFE), UINT64_C(0x021959D14B1DF4BB), + UINT64_C(0x01BC6741AD8DA8F8) }, + { UINT64_C(0x034CD028C42166D8), UINT64_C(0x0185807E32738495), + UINT64_C(0x005883F1CCD9FD2E), UINT64_C(0x03CA0BFCEE08ED5A), + UINT64_C(0x03EAF8CDFF12C8BC), UINT64_C(0x039F9E6871AF8AEE), + UINT64_C(0x0109893E423B3304), UINT64_C(0x0120DC6E783F51AB), + UINT64_C(0x011A855D5413AED9) } }, + { { UINT64_C(0x03EC078648AA3834), UINT64_C(0x022666BDFBC08928), + UINT64_C(0x020CD318C559ED79), UINT64_C(0x031A1F3F1113AB91), + UINT64_C(0x0225DA57498B9B85), UINT64_C(0x00501D2B9387A084), + UINT64_C(0x01462ED6150B49FB), UINT64_C(0x0270A359C4EB430D), + UINT64_C(0x01AD03ACD7F1F2DA) }, + { UINT64_C(0x00577220553E08C6), UINT64_C(0x02711DCC2A6176C2), + UINT64_C(0x00D41E0F942DF9B3), UINT64_C(0x032019849BF44B40), + UINT64_C(0x006F6F65E6AF51C1), UINT64_C(0x02192F8FD6395745), + UINT64_C(0x0369C64E6D49408A), UINT64_C(0x01C1CA82AADBB384), + UINT64_C(0x00252180D9240A33) } }, + { { UINT64_C(0x03B36603F69B34EA), UINT64_C(0x023601EA98DB7FF6), + UINT64_C(0x0119384D5B4D0084), UINT64_C(0x009CB1557E1A2117), + UINT64_C(0x0120F29FC187E5AB), UINT64_C(0x020795FEFEF91AF3), + UINT64_C(0x01654BD2C20FF213), UINT64_C(0x0193B09B2AFFB3A3), + UINT64_C(0x01F2DBD41C09A92B) }, + { UINT64_C(0x0190B8EB79047156), UINT64_C(0x002863629F98DF90), + UINT64_C(0x0131D825BFCD5C94), UINT64_C(0x012459BCEEE81461), + UINT64_C(0x012AEB328B250B06), UINT64_C(0x031E1C2DAC09694B), + UINT64_C(0x000530A4AD5276F9), UINT64_C(0x02B3D1F18BB7C853), + UINT64_C(0x01E8BD2FCCA04F6F) } }, + { { UINT64_C(0x02834F110665B1CF), UINT64_C(0x017AA90109CDC18A), + UINT64_C(0x009242A3E1F2E720), UINT64_C(0x02D5A60BD5F8954E), + UINT64_C(0x03508324EB838D5B), UINT64_C(0x02EDD0C3ED33B190), + UINT64_C(0x00AAD5DC3A119996), UINT64_C(0x01CD04A457847144), + UINT64_C(0x008F9F585EE51416) }, + { UINT64_C(0x0353544CA94CC511), UINT64_C(0x03C458B74ECFBB85), + UINT64_C(0x00DFB34B9CF940F6), UINT64_C(0x025DDCAA8FA2C670), + UINT64_C(0x005DE224A75FEDB1), UINT64_C(0x0133692E8F60712D), + UINT64_C(0x0273753106CAA7BE), UINT64_C(0x01408D58EA2D6196), + UINT64_C(0x00E26553508F8448) } }, + { { UINT64_C(0x01A3A4F60BB13D25), UINT64_C(0x0023ED9ED8B71298), + UINT64_C(0x03FFC9A520FCC5AA), UINT64_C(0x0045A041830B9268), + UINT64_C(0x00CC9DB2983FF213), UINT64_C(0x0121E74580D3BD97), + UINT64_C(0x03180DFFF5302191), UINT64_C(0x017F708B61C069C2), + UINT64_C(0x00AFC5190BADFB44) }, + { UINT64_C(0x0059EAFDA4B66F01), UINT64_C(0x007705DA965D6F67), + UINT64_C(0x020B87871134FA29), UINT64_C(0x01AD088735B31B4F), + UINT64_C(0x018012C061713383), UINT64_C(0x0284C3C51E97DE38), + UINT64_C(0x011439AE9AC5E3B5), UINT64_C(0x0201A73CE2ADC421), + UINT64_C(0x013663825C862321) } }, + { { UINT64_C(0x018D68C0B140A004), UINT64_C(0x01BFAA6599011216), + UINT64_C(0x01E7950576D7B0B1), UINT64_C(0x0078B24B131D0E5F), + UINT64_C(0x02AD5C3FFEDF02C1), UINT64_C(0x0322CFD3147C6177), + UINT64_C(0x038BD27915C61C9C), UINT64_C(0x02F37687B9498DE9), + UINT64_C(0x00EBB6AC6E166ECF) }, + { UINT64_C(0x01DE078E81F8F797), UINT64_C(0x036F3FD0C148612A), + UINT64_C(0x00D42800CEE62CC8), UINT64_C(0x02EF08C94C9988E1), + UINT64_C(0x02A200E24C7221CE), UINT64_C(0x0087BB91FBA9446C), + UINT64_C(0x01AEF9F64351AA5D), UINT64_C(0x0379F61D1F515F5C), + UINT64_C(0x01D6BBEA838FBDE0) } }, + { { UINT64_C(0x029C5257AC98DFAE), UINT64_C(0x033122DA34CA0C86), + UINT64_C(0x02E5AEB04EB596D8), UINT64_C(0x01866E31FF449E97), + UINT64_C(0x01EFC618512D868E), UINT64_C(0x02AB8DD8A2E422DD), + UINT64_C(0x0315FBBF0AB5F678), UINT64_C(0x029B64EE769245C7), + UINT64_C(0x006C6C12185D61E3) }, + { UINT64_C(0x008781A5F0C92FB5), UINT64_C(0x02186CDBC76A7DC2), + UINT64_C(0x02BF30F2AE35EBF2), UINT64_C(0x02A9033768598F59), + UINT64_C(0x026D8F763CE2DDB2), UINT64_C(0x000096A41DC06247), + UINT64_C(0x0378DBDD308791A2), UINT64_C(0x0303B0E7D471E5F3), + UINT64_C(0x0047B4CFEAEEA101) } }, + { { UINT64_C(0x03329136A629DD22), UINT64_C(0x00E5BE3AD1E98750), + UINT64_C(0x00E718574118A518), UINT64_C(0x0001BFD334A31B85), + UINT64_C(0x010ACC7BD56131AD), UINT64_C(0x01BAE8680FF31AF2), + UINT64_C(0x033BF365D3656538), UINT64_C(0x01275681F6A3E780), + UINT64_C(0x01D9134C0EBA1F9E) }, + { UINT64_C(0x03FC0784F75200EB), UINT64_C(0x02505880E37CB45D), + UINT64_C(0x02D012B6F4AEDF75), UINT64_C(0x0239FE68EEDA06B2), + UINT64_C(0x0214FD97D35A83E1), UINT64_C(0x0161FD60913389DA), + UINT64_C(0x02E06AA08A955A74), UINT64_C(0x00A478BB3A540872), + UINT64_C(0x0194213360ACA782) } }, + { { UINT64_C(0x01C7D837402145D7), UINT64_C(0x029A3987EA8CF574), + UINT64_C(0x017B7322E3920EED), UINT64_C(0x01DA90CCE8A07229), + UINT64_C(0x019966632762CF1A), UINT64_C(0x02EA82E975BFDBB2), + UINT64_C(0x00D089776CD7C2DA), UINT64_C(0x01094FFA3D38BAB2), + UINT64_C(0x00ED9425E7C61A8F) }, + { UINT64_C(0x030890ADFDDB406F), UINT64_C(0x02F38194427778C1), + UINT64_C(0x02645A577E29DB0B), UINT64_C(0x02B73BB5A04F839F), + UINT64_C(0x02CBE569872B94D6), UINT64_C(0x034D3051E8314100), + UINT64_C(0x0228FAA39358328C), UINT64_C(0x00F6B458D19C41F5), + UINT64_C(0x01B60D6BFFF120A1) } }, + }, + { + { { UINT64_C(0x03B0D91DCEF34144), UINT64_C(0x0240FE90ACAA2EEA), + UINT64_C(0x02F5638E4C5FABC5), UINT64_C(0x0279B56C13AF89E7), + UINT64_C(0x007BB923CEB3416E), UINT64_C(0x024528E9111E0646), + UINT64_C(0x0019F3658FEFA212), UINT64_C(0x007942C115ACBB8B), + UINT64_C(0x00B3176361BBE92C) }, + { UINT64_C(0x0056A1AF824FDE34), UINT64_C(0x03EFECC262943F2F), + UINT64_C(0x00F55AB9CFA7333B), UINT64_C(0x02E423937E89B9C8), + UINT64_C(0x0177865B2FF1E104), UINT64_C(0x00D9D0346E5AE2AF), + UINT64_C(0x0250F4369EB257AA), UINT64_C(0x02479F5CEE51B49A), + UINT64_C(0x007A588E4A1470CD) } }, + { { UINT64_C(0x006FD0B27FF5FDD9), UINT64_C(0x0315207EADCA6EB7), + UINT64_C(0x038531FDE9E82663), UINT64_C(0x03E9C7DA1307DC24), + UINT64_C(0x007FCF66FC293D27), UINT64_C(0x0073411170172CF4), + UINT64_C(0x03FA0B1709D86BA1), UINT64_C(0x0023FC735B565525), + UINT64_C(0x00C65EABD8A0D474) }, + { UINT64_C(0x001EA477B6B64713), UINT64_C(0x03CAD4127E803700), + UINT64_C(0x02F97EFCE2EC6148), UINT64_C(0x021B881732700041), + UINT64_C(0x01A6D874ACACA115), UINT64_C(0x00A7CA705835C220), + UINT64_C(0x01191B137DD5C14D), UINT64_C(0x02CB4161AB1B2384), + UINT64_C(0x01EA96470F229677) } }, + { { UINT64_C(0x016F41AA44BE78BD), UINT64_C(0x00DBC87805312BB8), + UINT64_C(0x0318156EA17D7B54), UINT64_C(0x026CDF0148DE5C45), + UINT64_C(0x03F974EA0D77EB08), UINT64_C(0x02136BB03794FF4E), + UINT64_C(0x01B53A227C4C2E9C), UINT64_C(0x02B0229F1C11498E), + UINT64_C(0x01CDAB34CEF9122C) }, + { UINT64_C(0x01942B2B520FED74), UINT64_C(0x0278BB0606178C91), + UINT64_C(0x03C70799A5848E33), UINT64_C(0x01024AF0188FBCA7), + UINT64_C(0x017502FD5E81CD21), UINT64_C(0x0341AC8FD5BE6E9F), + UINT64_C(0x03807308C0C55507), UINT64_C(0x02DA9120D7D39BD9), + UINT64_C(0x0078E0C0ADC9F3B8) } }, + { { UINT64_C(0x0249E4056736B7A8), UINT64_C(0x000AD5FD0E326A32), + UINT64_C(0x00F1D8DD5BD49BAE), UINT64_C(0x03C65D240FD61C7B), + UINT64_C(0x0348AA1A2246B05E), UINT64_C(0x03D6D10E55244A30), + UINT64_C(0x02E9906E8F8D085E), UINT64_C(0x0187FD8BEFA8BFBF), + UINT64_C(0x00F8ECD06F55C492) }, + { UINT64_C(0x003A56FE1DEF19D6), UINT64_C(0x0197C74F933E6798), + UINT64_C(0x005694559A51C48D), UINT64_C(0x028423114901AE4B), + UINT64_C(0x006C134B2FD133CC), UINT64_C(0x01F5B1FDE595A9F1), + UINT64_C(0x037CDF87E407C290), UINT64_C(0x01C9430D19026B6E), + UINT64_C(0x00AE4EBC0B91EEC4) } }, + { { UINT64_C(0x0027F5A2CFACC519), UINT64_C(0x0007D8CA3F95188A), + UINT64_C(0x02386E76D1ED1FA2), UINT64_C(0x012CFC615ECB44AE), + UINT64_C(0x02BAC8E16C4EECC0), UINT64_C(0x030FC8B6EACB48A4), + UINT64_C(0x0356F1C94FF8F3DD), UINT64_C(0x00E7898C9228D80E), + UINT64_C(0x0100391DE5D28C45) }, + { UINT64_C(0x00DDA167BAEA3E6E), UINT64_C(0x024E9B6238591A96), + UINT64_C(0x000B124B20D76C9C), UINT64_C(0x00844E80DAD85B15), + UINT64_C(0x006322B9CC9CFBC9), UINT64_C(0x03C3F3E68B0EC1FB), + UINT64_C(0x0198C8988C8CDF43), UINT64_C(0x012F63F58B2E6769), + UINT64_C(0x0146D6A4BBF8FA16) } }, + { { UINT64_C(0x025929A379C36058), UINT64_C(0x03AA8D69D0F228FC), + UINT64_C(0x03137C58503106D0), UINT64_C(0x031D3407BEC09250), + UINT64_C(0x012A5E9F3CB78FCD), UINT64_C(0x03C89A97F7DE8B2F), + UINT64_C(0x03FFA336D8C2CB9D), UINT64_C(0x03CDFCCBE0B2ABB7), + UINT64_C(0x018DB520A44381C3) }, + { UINT64_C(0x037F91B7E71EFA02), UINT64_C(0x02CD2A4F8F2A0051), + UINT64_C(0x03247FBAA82739BD), UINT64_C(0x004F7652DC5CA6F6), + UINT64_C(0x0247D54BFA1094B5), UINT64_C(0x01201F41A5F24EA8), + UINT64_C(0x036AE048899075C8), UINT64_C(0x008DE5B2C2092D5F), + UINT64_C(0x01A05D1DEF90E6C9) } }, + { { UINT64_C(0x009C63F00DDEF055), UINT64_C(0x029E867514AE17BD), + UINT64_C(0x0071477B7FA6548A), UINT64_C(0x01DCF23B30CCB894), + UINT64_C(0x039F3EAF10214846), UINT64_C(0x0131314742EE42E6), + UINT64_C(0x025A42537B162041), UINT64_C(0x0344D321CAEDE286), + UINT64_C(0x00C49346566A2F80) }, + { UINT64_C(0x00AC1057A1A2F1BD), UINT64_C(0x01B16F3F4CF6D85A), + UINT64_C(0x00470A35FA26D12C), UINT64_C(0x02FDF7EC571664A6), + UINT64_C(0x00357DE22954AF5D), UINT64_C(0x01CB9B6C3295D89E), + UINT64_C(0x02A6D5E003D32198), UINT64_C(0x02BCFEFCD08395C8), + UINT64_C(0x0024E3256C9EC29E) } }, + { { UINT64_C(0x02E3E3726899A80A), UINT64_C(0x0026F9277D12E5D8), + UINT64_C(0x03A9F147B7CC784D), UINT64_C(0x02D1E1BE2785B816), + UINT64_C(0x035FD35148DBC7EB), UINT64_C(0x008735EF566F4D0B), + UINT64_C(0x023A56774FF10ABF), UINT64_C(0x02650BA6B7B26925), + UINT64_C(0x016ADF49024BBCF1) }, + { UINT64_C(0x003AD342E4E67976), UINT64_C(0x03C92192D00DAB16), + UINT64_C(0x020460FDED50A384), UINT64_C(0x034C8C7A7CCCB477), + UINT64_C(0x026F1F63625979C2), UINT64_C(0x01C81B4E10D5FC66), + UINT64_C(0x036A3D003DC0490C), UINT64_C(0x012B902A026C1347), + UINT64_C(0x01F7B86A36390DAD) } }, + { { UINT64_C(0x000691E2EC112CB8), UINT64_C(0x024EF99D143B7D60), + UINT64_C(0x0115A42EEFCFA47F), UINT64_C(0x01E802D725D2BBE5), + UINT64_C(0x0121B37EFA442937), UINT64_C(0x0017BB506D32E10E), + UINT64_C(0x026AAA87600CCD57), UINT64_C(0x016CF4C8E0A70FF4), + UINT64_C(0x009FFBF163AE94B4) }, + { UINT64_C(0x0295886926814D18), UINT64_C(0x03A0FBF4C1A9E1DB), + UINT64_C(0x03C42214E510B980), UINT64_C(0x01795048E2D2FBCB), + UINT64_C(0x007E6ECA8AF45230), UINT64_C(0x03B7348F6C6F8B62), + UINT64_C(0x0082EEE297D2810F), UINT64_C(0x001262A01DEC143A), + UINT64_C(0x01B9903A2D05B891) } }, + { { UINT64_C(0x023634A86BE77EA4), UINT64_C(0x00A0B41ED63F1BFE), + UINT64_C(0x0275C4824374C264), UINT64_C(0x02608A7A328E460A), + UINT64_C(0x00FED89AAE8DD2B7), UINT64_C(0x02109029EF3CE021), + UINT64_C(0x011969F67E04BEBE), UINT64_C(0x01A57DE74BB6D7CF), + UINT64_C(0x0032260FF5FAEF2A) }, + { UINT64_C(0x02058C1764B8EB93), UINT64_C(0x034A7BEAEE142796), + UINT64_C(0x01C4178E14455ABA), UINT64_C(0x0089C0C3FD3F4E75), + UINT64_C(0x006C6AD7C0E981DA), UINT64_C(0x0228FCA3E86007B0), + UINT64_C(0x025CE2ECCA48B8F4), UINT64_C(0x01E5A636E10EA6E7), + UINT64_C(0x00B998D460C196E1) } }, + { { UINT64_C(0x0160926185730C8D), UINT64_C(0x032DE7C19EF3EB5F), + UINT64_C(0x01B89DB78DA4AF19), UINT64_C(0x03E8BF1A8A7D683F), + UINT64_C(0x00C74484F132486E), UINT64_C(0x0020C78A33777ADF), + UINT64_C(0x028B418FCCA39E1E), UINT64_C(0x03C6B30F7BDFA864), + UINT64_C(0x012E1D3651FF3815) }, + { UINT64_C(0x023FC40DA01A8D36), UINT64_C(0x0396DC8A8E0AC356), + UINT64_C(0x0257ECBA277518BE), UINT64_C(0x015E0BE8CDCF0B5F), + UINT64_C(0x017CA95C0BC967EE), UINT64_C(0x0305AA19591EC746), + UINT64_C(0x00ECEE9B1C5E531F), UINT64_C(0x017F62DDF7CD8C93), + UINT64_C(0x01843F3A5D58D681) } }, + { { UINT64_C(0x008235BF1CE87EAC), UINT64_C(0x0337B13BA7D5C15E), + UINT64_C(0x03846B02056DE241), UINT64_C(0x033C6CAEB5DEAB90), + UINT64_C(0x030248638020D787), UINT64_C(0x0224F8D01B9221DD), + UINT64_C(0x01F402C62FF58E8A), UINT64_C(0x03AAD9850E5506F5), + UINT64_C(0x003902A9875C05FB) }, + { UINT64_C(0x0020DA18AA01F6F0), UINT64_C(0x030A6715F4E78D18), + UINT64_C(0x037807033B777232), UINT64_C(0x01B7606FD787D415), + UINT64_C(0x008A9CC327698B87), UINT64_C(0x0061BCA066C82FF1), + UINT64_C(0x01BFA28EB25A2709), UINT64_C(0x024D6272DC7593CB), + UINT64_C(0x00EC0BB76A281871) } }, + { { UINT64_C(0x032999435C8AA41D), UINT64_C(0x01A489157A228E17), + UINT64_C(0x0156F793B6B0E956), UINT64_C(0x028D96D92EBD33D6), + UINT64_C(0x0359740492EFE167), UINT64_C(0x015A71262E572E91), + UINT64_C(0x01FA4485B8FC6399), UINT64_C(0x0347A0956647A542), + UINT64_C(0x010E38E5A425F12F) }, + { UINT64_C(0x00AEFDFC244C41BB), UINT64_C(0x003952945BE8B3B5), + UINT64_C(0x0319FE9C6BCFD1F0), UINT64_C(0x03F504A658EDEE0B), + UINT64_C(0x02ED873A43F5A1E1), UINT64_C(0x02712F6EE0434187), + UINT64_C(0x03F8F26F084CADB4), UINT64_C(0x0037A2587E5D9BC4), + UINT64_C(0x007E3E8815CB75BB) } }, + { { UINT64_C(0x00D0B08F2FB80E07), UINT64_C(0x001F1C3F02C8AA99), + UINT64_C(0x02C965AB70A7B621), UINT64_C(0x02934839B849A6F8), + UINT64_C(0x003F88BA718D98ED), UINT64_C(0x02899A10EC155762), + UINT64_C(0x0019825E2EA0BBFE), UINT64_C(0x031BADAF50BB1556), + UINT64_C(0x00C2052564BF2D01) }, + { UINT64_C(0x02BBD600B64986F4), UINT64_C(0x0001308CBE96F1C1), + UINT64_C(0x00C849F303B9F9E3), UINT64_C(0x02D14076FC63D1DE), + UINT64_C(0x0236169D2D35EA78), UINT64_C(0x0264B3B8EE95BD05), + UINT64_C(0x002F66E82F19619B), UINT64_C(0x0095E5BD3AAECF3F), + UINT64_C(0x004DAC1BA614BE0C) } }, + { { UINT64_C(0x031F00ED67DF6D6E), UINT64_C(0x03D70047AC4E0BA7), + UINT64_C(0x02D8711992AA1754), UINT64_C(0x036ECAEB89D30859), + UINT64_C(0x0036A42A32CE3566), UINT64_C(0x01D98A9D0A6301E2), + UINT64_C(0x0254343364F9506D), UINT64_C(0x00BA44E9D5246E7C), + UINT64_C(0x01A19768E78BDB19) }, + { UINT64_C(0x01612B559D4C1CFE), UINT64_C(0x00FD06AC0FA53998), + UINT64_C(0x01000FCBA8F910A9), UINT64_C(0x02941E6AFC5E6D3F), + UINT64_C(0x00CAEFF18F01E2A7), UINT64_C(0x00C3611A9DC5189A), + UINT64_C(0x004BD42C721A7B6E), UINT64_C(0x02CFCE0AB6DE8255), + UINT64_C(0x0157E0604D9A6299) } }, + { { UINT64_C(0x004C36A17F3F00C1), UINT64_C(0x03AAE85897960B4C), + UINT64_C(0x00162519D94A771E), UINT64_C(0x00EFA894195CFB14), + UINT64_C(0x0377393E0BEA5785), UINT64_C(0x01275D68934C0C3C), + UINT64_C(0x020E33D09CE0D489), UINT64_C(0x00636664BBECE0A2), + UINT64_C(0x01D94E3BA2F10531) }, + { UINT64_C(0x00F1D932B72461C9), UINT64_C(0x030803CCCD33A980), + UINT64_C(0x03D527D0F91F6DBE), UINT64_C(0x032A75271076B0B3), + UINT64_C(0x00618C0762DDDF10), UINT64_C(0x0023381E1F452B93), + UINT64_C(0x02E55888093553F9), UINT64_C(0x0179B91A78A3270C), + UINT64_C(0x008109452184E2A2) } }, + }, + { + { { UINT64_C(0x039BF352B2648196), UINT64_C(0x0255A7410BF9D82B), + UINT64_C(0x00E69B9D9444400A), UINT64_C(0x0115B8CE4ADD0E15), + UINT64_C(0x0286C0702CA01A26), UINT64_C(0x0343E585D0F62B8D), + UINT64_C(0x0270AB3B658EDEED), UINT64_C(0x00BDF019DAC3BE2C), + UINT64_C(0x01DA71CEBA8F0207) }, + { UINT64_C(0x031B398D4D9BC7BB), UINT64_C(0x000CF24C3929C7AB), + UINT64_C(0x01B421C8D3FD5E6F), UINT64_C(0x007CC4196EE4E246), + UINT64_C(0x020BD4BEA34DCA8A), UINT64_C(0x0290B50CAE9698DF), + UINT64_C(0x00FCD1330F886EB9), UINT64_C(0x01E1AC79F03E8C00), + UINT64_C(0x00DA9DFFAC1D7299) } }, + { { UINT64_C(0x023B6F4171DE62A2), UINT64_C(0x02483565211B08E1), + UINT64_C(0x03590C48E9F4C557), UINT64_C(0x0300655D7CA7761E), + UINT64_C(0x000FC94679705CC8), UINT64_C(0x03F1F51E4C554176), + UINT64_C(0x02F4AA91C9B85DEC), UINT64_C(0x01830B06FDF1C0BD), + UINT64_C(0x01705BC114A4818F) }, + { UINT64_C(0x026AF34683BFC242), UINT64_C(0x02704B0386A138E6), + UINT64_C(0x0201A2D902335BC5), UINT64_C(0x00F97548337FE82F), + UINT64_C(0x0068481E95BAAC46), UINT64_C(0x02198BC38D3244C8), + UINT64_C(0x02FB3AE37E76F25B), UINT64_C(0x0051FD7A6C46B763), + UINT64_C(0x00BB4F63544525E2) } }, + { { UINT64_C(0x0184463DCFE3927A), UINT64_C(0x038592C4A5446C69), + UINT64_C(0x00820DA1FCA22B30), UINT64_C(0x01BE68F5BD638385), + UINT64_C(0x01820BD08BDBAACC), UINT64_C(0x02A44306C3D5797E), + UINT64_C(0x0038CCA1AA697778), UINT64_C(0x00C7C5B9FA5A6346), + UINT64_C(0x00AF09862D4121FA) }, + { UINT64_C(0x01CB3F3FBEBC6638), UINT64_C(0x037E0A83514FED33), + UINT64_C(0x03EACD5523409D6F), UINT64_C(0x020D6BA55D786340), + UINT64_C(0x01CCC13F9ADFA032), UINT64_C(0x0019CA4869978150), + UINT64_C(0x039E387EBA3B5F3E), UINT64_C(0x02E531E4CE95EAED), + UINT64_C(0x019F9D4B6C1E271A) } }, + { { UINT64_C(0x03D9C637E6B4D0F2), UINT64_C(0x02F39727B4A2B4A9), + UINT64_C(0x03B1C91C466BE1FF), UINT64_C(0x0002CA1D422DB470), + UINT64_C(0x035959F6F8064E3B), UINT64_C(0x01A06409B64B70C1), + UINT64_C(0x0138166589198416), UINT64_C(0x01E4D2E6E69DFBF6), + UINT64_C(0x01235B6CCAD8ED3A) }, + { UINT64_C(0x036BC004511EBBDB), UINT64_C(0x03C77128404EB6AD), + UINT64_C(0x02C7DBC63944D083), UINT64_C(0x00A0B83D92DC53A7), + UINT64_C(0x0236B4A39AE88503), UINT64_C(0x03A8D6E5C0E1C279), + UINT64_C(0x029FE38FA8BE1456), UINT64_C(0x03585B0A0A7CC668), + UINT64_C(0x00A7641453F65799) } }, + { { UINT64_C(0x00158306BEA400A9), UINT64_C(0x007F40534A2A445F), + UINT64_C(0x01C35C303D86F4A4), UINT64_C(0x00EDDE592FDFA8FD), + UINT64_C(0x0103A9EFC14289AA), UINT64_C(0x03407BDDBE6E50BA), + UINT64_C(0x009401AB57CFB13E), UINT64_C(0x0399C8A12EA5A5B1), + UINT64_C(0x00FC6AFA631B2401) }, + { UINT64_C(0x03676F7FA3EA1F68), UINT64_C(0x0292D21900F132BA), + UINT64_C(0x023C1FDE32777454), UINT64_C(0x016AD44E9E4A043B), + UINT64_C(0x034CE0B6BF5A83B8), UINT64_C(0x007C5DBECEE12BCA), + UINT64_C(0x034C6521C9D71204), UINT64_C(0x0295DA0F38E7DE8B), + UINT64_C(0x0062381F9092A871) } }, + { { UINT64_C(0x021E20A63FBBA24C), UINT64_C(0x036388882DF52B55), + UINT64_C(0x00530F2F7C7C2371), UINT64_C(0x03643DB108CC955E), + UINT64_C(0x024B18165F1B6107), UINT64_C(0x02769559E8B8FA46), + UINT64_C(0x00ABDA3964357585), UINT64_C(0x006A3DE26D6BDE65), + UINT64_C(0x00FA0EF45FF0F7F0) }, + { UINT64_C(0x0328AF72F4ADEFE3), UINT64_C(0x00F209DB1F3C181A), + UINT64_C(0x01A0AC16B36B8052), UINT64_C(0x03FE68F1AFEB358F), + UINT64_C(0x011BB7B356C432BB), UINT64_C(0x03D087AF0D447953), + UINT64_C(0x00088B00BECEF91E), UINT64_C(0x0330A2DA3B763B85), + UINT64_C(0x01CC26379FF0902A) } }, + { { UINT64_C(0x02451A0F72841A85), UINT64_C(0x0354FC0056ED797F), + UINT64_C(0x03F4EAB6EB12B346), UINT64_C(0x0032B842273C8FB8), + UINT64_C(0x024B836D935DD874), UINT64_C(0x0090627CCD9E0492), + UINT64_C(0x0244927C3C49DF5D), UINT64_C(0x0042534A4E5AA66E), + UINT64_C(0x00B4C23CB62729C6) }, + { UINT64_C(0x00295DE15E7B0D82), UINT64_C(0x003481AED4B38216), + UINT64_C(0x020CB574DA2A8CEB), UINT64_C(0x03DB292DC006EFC3), + UINT64_C(0x03153DE3966C31DB), UINT64_C(0x0398C0D13BB538D2), + UINT64_C(0x00D2735B5509DAE6), UINT64_C(0x00BBE1C7422AD656), + UINT64_C(0x006495E2F55306CC) } }, + { { UINT64_C(0x00FC0E58752517BF), UINT64_C(0x0287DC3FE2714AA6), + UINT64_C(0x024BBBD332D8AADB), UINT64_C(0x000BF6FA0D08504F), + UINT64_C(0x02E724A624D71D7E), UINT64_C(0x01F16EF435B7F288), + UINT64_C(0x024E6F71370923F3), UINT64_C(0x00C2B9525922566C), + UINT64_C(0x005733338A43CFE0) }, + { UINT64_C(0x0372270A8BB6E5C0), UINT64_C(0x0023295E1C578E27), + UINT64_C(0x01EA019B1BDD171A), UINT64_C(0x0243564F2EC5E9B6), + UINT64_C(0x01283B58FFA9DAE7), UINT64_C(0x00215CCB462BFC41), + UINT64_C(0x03E3900D562119A3), UINT64_C(0x0273C10EF622442D), + UINT64_C(0x00D7B5F5A5718A0A) } }, + { { UINT64_C(0x03E792204254F3D7), UINT64_C(0x0197A7FB52460AD3), + UINT64_C(0x0387DC97132E1376), UINT64_C(0x00D82DE34F7F5873), + UINT64_C(0x03B853655C8CF8AC), UINT64_C(0x0173E013A8BD55E9), + UINT64_C(0x008A7D4896369A87), UINT64_C(0x024DBCC16EA9BB3A), + UINT64_C(0x010910C0CEC40352) }, + { UINT64_C(0x03B95A34F108C612), UINT64_C(0x0333E2F3D8672331), + UINT64_C(0x028C77D48D5C235B), UINT64_C(0x0233CC3106C11962), + UINT64_C(0x03EBBF90DDDA15FE), UINT64_C(0x0369066DD81ED647), + UINT64_C(0x03BD05AA96CD4304), UINT64_C(0x039E3FFACDB3BA32), + UINT64_C(0x01EAC4B260DDEC7F) } }, + { { UINT64_C(0x035858F23BBE227D), UINT64_C(0x00EAE5030697E923), + UINT64_C(0x02368A87F3DE71C5), UINT64_C(0x0168E7B6DEE0F7C3), + UINT64_C(0x00527543ED139D52), UINT64_C(0x0127219B1CDD187E), + UINT64_C(0x023DB1516D99AC2E), UINT64_C(0x008101C88F395DB5), + UINT64_C(0x00C6A87659F9030E) }, + { UINT64_C(0x039C69A3A7EC3A20), UINT64_C(0x02842173900384B8), + UINT64_C(0x0136BA0852E2F7FE), UINT64_C(0x034921364764BE1F), + UINT64_C(0x02C74764840F38B3), UINT64_C(0x02F37D32908AE4DC), + UINT64_C(0x0138C24B162396AC), UINT64_C(0x02A70AD1A514245D), + UINT64_C(0x00C442ABF244BFAF) } }, + { { UINT64_C(0x02A6B09F093E7BB1), UINT64_C(0x027395A268EC7AC7), + UINT64_C(0x028CC643D554CA43), UINT64_C(0x0035243849E2C949), + UINT64_C(0x03CF25745B571D36), UINT64_C(0x00F8968B891A06D4), + UINT64_C(0x03F9158462DF4912), UINT64_C(0x0277B23F176B632C), + UINT64_C(0x0100FDC9203FE38B) }, + { UINT64_C(0x024667E35C0213B3), UINT64_C(0x001C9D8E55C59D73), + UINT64_C(0x03C67911C028CE7C), UINT64_C(0x01D6BE78640D4CA8), + UINT64_C(0x024E359FD8B3F600), UINT64_C(0x03240449153262A6), + UINT64_C(0x03B253E7A16A83A5), UINT64_C(0x02FDB9879C3019FF), + UINT64_C(0x01D5771531A45180) } }, + { { UINT64_C(0x02FFF1EEAD72BA02), UINT64_C(0x01773B2AD40CD7B5), + UINT64_C(0x00B549067C93A24B), UINT64_C(0x0040E568D769A5B9), + UINT64_C(0x01CBA8C547CFD559), UINT64_C(0x01B900D1740D29F8), + UINT64_C(0x0153A5FEC2807EDD), UINT64_C(0x003616B13CBFDC6E), + UINT64_C(0x014FA30FBEC2B9FF) }, + { UINT64_C(0x03CEBD84555A3B73), UINT64_C(0x011642C087A74BA4), + UINT64_C(0x03FAF4C90C28B568), UINT64_C(0x00D2B6FE13831FC3), + UINT64_C(0x02F1845F4A404C99), UINT64_C(0x03031352DB2945ED), + UINT64_C(0x0192B108B24A2CC8), UINT64_C(0x008B79F2C497B8AE), + UINT64_C(0x016844B1F9A48A1A) } }, + { { UINT64_C(0x033F1B159EA0B318), UINT64_C(0x015BA4F73890FCA5), + UINT64_C(0x03AB1671767AEB58), UINT64_C(0x0190DE3F4B53983C), + UINT64_C(0x01C67D39EE1606B7), UINT64_C(0x02092898897E0832), + UINT64_C(0x016BC61B17E221D9), UINT64_C(0x0302B2A3F7863F1A), + UINT64_C(0x0153FC11A3315E45) }, + { UINT64_C(0x02AC9E25352466CC), UINT64_C(0x03A49408E6FA3892), + UINT64_C(0x03B3B7FC83F96BAA), UINT64_C(0x02447E01B52DE677), + UINT64_C(0x01EB6353F032192D), UINT64_C(0x00910C3CF3E5926D), + UINT64_C(0x02261F650A5EA2DB), UINT64_C(0x03AA8819EC45E274), + UINT64_C(0x01F274F4B47595FA) } }, + { { UINT64_C(0x0026282EB3F78C83), UINT64_C(0x00C28C0709CFCB19), + UINT64_C(0x01821376CE1FE0A2), UINT64_C(0x01FDCED392DF4511), + UINT64_C(0x007CEFA4CDFC46EC), UINT64_C(0x01C18D201835A1D3), + UINT64_C(0x021190BA9D0FC1B3), UINT64_C(0x01CF1181F215C327), + UINT64_C(0x0144F63DC1DC2337) }, + { UINT64_C(0x02467154F82AE76F), UINT64_C(0x00A8E4BC6B21A6C1), + UINT64_C(0x003C5960D11DFC29), UINT64_C(0x02CCE05B7F97DFEA), + UINT64_C(0x0155EBEF61A21A64), UINT64_C(0x02E5A1DD22DB3809), + UINT64_C(0x008CACD3BAEA4ADC), UINT64_C(0x01AF102BA92E48C7), + UINT64_C(0x0060B7381DB1721E) } }, + { { UINT64_C(0x03861A0264B1FB35), UINT64_C(0x02F8C8B3CD33A6FA), + UINT64_C(0x030806F41BBA295F), UINT64_C(0x0164D82631325495), + UINT64_C(0x00CE9EA6FF0E358B), UINT64_C(0x0079012DD18DCC6B), + UINT64_C(0x000CC353D3BB1AC0), UINT64_C(0x03AB6D47DE397D50), + UINT64_C(0x00AD096897EA08E2) }, + { UINT64_C(0x023B78EFC3812C10), UINT64_C(0x0089EFA9532A659C), + UINT64_C(0x0281A0EB9A3DF013), UINT64_C(0x03AE4559CDF48DB0), + UINT64_C(0x00CF5D05BA21B5A4), UINT64_C(0x000FB2B315217C86), + UINT64_C(0x018D07209C8D7927), UINT64_C(0x0142BF514B4FAA4C), + UINT64_C(0x002374D59706AD5B) } }, + { { UINT64_C(0x00C15F67DD00894F), UINT64_C(0x0365718AE78487A2), + UINT64_C(0x01F5CF8A8DD7221A), UINT64_C(0x00B966824944DA72), + UINT64_C(0x039495E53E96A028), UINT64_C(0x017A489926C99CDF), + UINT64_C(0x03E7DBA2A6042AD8), UINT64_C(0x0070896FE2C77ED8), + UINT64_C(0x01DE2D3E99009396) }, + { UINT64_C(0x02CDACE519305F18), UINT64_C(0x0199321FCFA0FFC9), + UINT64_C(0x01FDEB80C6DC481C), UINT64_C(0x02944307EF501A18), + UINT64_C(0x0007F535095DB6A0), UINT64_C(0x01898CF112F16E56), + UINT64_C(0x00CB5741AFE7E00B), UINT64_C(0x01926B1FD8D17FCB), + UINT64_C(0x015E5CD28BDE5A59) } }, + }, + { + { { UINT64_C(0x0287283D0F0DB502), UINT64_C(0x01F7D518BD1DEC47), + UINT64_C(0x0110E901D0288278), UINT64_C(0x000A9C8AA5A57C0C), + UINT64_C(0x03B765C5FA16BDCF), UINT64_C(0x03E5DF4E7DE798D7), + UINT64_C(0x00F43CD382F586CB), UINT64_C(0x016DF729B4C5BFE2), + UINT64_C(0x00F84CAB1D3D3490) }, + { UINT64_C(0x03C62F43F45CE248), UINT64_C(0x01779CCA073E2076), + UINT64_C(0x003E7EB22E4B1573), UINT64_C(0x0192926CE48BFBEA), + UINT64_C(0x00AEAE190B45D381), UINT64_C(0x02BD36FBE7AB443A), + UINT64_C(0x00906E0CD124F126), UINT64_C(0x025881B2A14C49E4), + UINT64_C(0x016E768F54273911) } }, + { { UINT64_C(0x0339D7B298B06389), UINT64_C(0x00171C63E44DC1B1), + UINT64_C(0x00C31B1589FD2080), UINT64_C(0x00B27F131898A9FA), + UINT64_C(0x0342FE5ADE76B5A2), UINT64_C(0x01090D97105A2655), + UINT64_C(0x0388BB1432187198), UINT64_C(0x02D27D0C82BF52D7), + UINT64_C(0x00807B9F1B11A583) }, + { UINT64_C(0x01F3344975177EBC), UINT64_C(0x00D1C4854243F6DB), + UINT64_C(0x00CF85E1839AB312), UINT64_C(0x00D9C19A12D20012), + UINT64_C(0x01709110819085E7), UINT64_C(0x011FEDA170483D5C), + UINT64_C(0x01B28F055EEB31A0), UINT64_C(0x02289D0F2CBAB0E6), + UINT64_C(0x000867BA2963A0E1) } }, + { { UINT64_C(0x03F6911B90581DC0), UINT64_C(0x01F1FB19987F20FB), + UINT64_C(0x0134E22EFA2F437F), UINT64_C(0x00398E1EB156A4E0), + UINT64_C(0x0325F4C0DBD2FAF4), UINT64_C(0x0204D252D5C55B5B), + UINT64_C(0x00E279F64EA373DA), UINT64_C(0x01DB9B5CD34A8E6F), + UINT64_C(0x00D14F2FC1B2EE3D) }, + { UINT64_C(0x0391CF084FAB453E), UINT64_C(0x016D9E632F3C4388), + UINT64_C(0x01D15FD339420C4A), UINT64_C(0x026356CC61C907C7), + UINT64_C(0x026E23E3D6197795), UINT64_C(0x0142F5E058DB2B6C), + UINT64_C(0x020EFE8EAFF59180), UINT64_C(0x00A481A4F4563A8C), + UINT64_C(0x012FEE21C8B4C4E9) } }, + { { UINT64_C(0x02056DCD3DB8A57B), UINT64_C(0x0317AAE4B46AB720), + UINT64_C(0x031833D064C1F1CD), UINT64_C(0x03A3CC17BEBD056B), + UINT64_C(0x03F05A7034003715), UINT64_C(0x009FAC41671C58C9), + UINT64_C(0x01BEE4D8BD8671CA), UINT64_C(0x0004BC6DBD8A8392), + UINT64_C(0x01F15A2D6E92E74A) }, + { UINT64_C(0x010933993D4BD6B6), UINT64_C(0x028502613D6FDD77), + UINT64_C(0x0134D55E73D97A09), UINT64_C(0x001DB5E602D2AA86), + UINT64_C(0x00FE1E6979BF531F), UINT64_C(0x02AC99028117960B), + UINT64_C(0x03849A42EAAB4E66), UINT64_C(0x0190FBBD3B94D87F), + UINT64_C(0x011CAB9AC249065C) } }, + { { UINT64_C(0x03000D01D5AD0B4E), UINT64_C(0x01E094F415439045), + UINT64_C(0x0071645EF32A823C), UINT64_C(0x013C18E27FCF9EA5), + UINT64_C(0x00B2733886CDC7A9), UINT64_C(0x02902330EF732EA5), + UINT64_C(0x003C25CEA5C5686B), UINT64_C(0x029DF5773028F0CD), + UINT64_C(0x016FB941FCD6583D) }, + { UINT64_C(0x01DEA99AF3494AD9), UINT64_C(0x03BA2C1B9C712901), + UINT64_C(0x02E32E4B0A8430F2), UINT64_C(0x00CB695E8BF6F96B), + UINT64_C(0x0161F767B32907C2), UINT64_C(0x002FC8531B5E7CEC), + UINT64_C(0x00298C1304153AFA), UINT64_C(0x0189BCBF02EE4544), + UINT64_C(0x0035592EC7CAC39B) } }, + { { UINT64_C(0x0359513866647B76), UINT64_C(0x00DB6945523879DD), + UINT64_C(0x0349C662AF030344), UINT64_C(0x03638440AAB5A275), + UINT64_C(0x02A0720FE9DC8A6B), UINT64_C(0x011CEE4DF271AE5F), + UINT64_C(0x00BC676869500BE5), UINT64_C(0x02F5135FF9B7674F), + UINT64_C(0x00142511483B55E9) }, + { UINT64_C(0x02DE083E6D8A2C33), UINT64_C(0x014C0545D4B8062F), + UINT64_C(0x01AD94143AC28589), UINT64_C(0x01AEBAA37C00A634), + UINT64_C(0x0078E06973DA0209), UINT64_C(0x03F56A237FA0E6B0), + UINT64_C(0x02879F4A94D49E71), UINT64_C(0x01BE6BF822D1FD4F), + UINT64_C(0x00F9E2018F9FBF87) } }, + { { UINT64_C(0x025B8DCB938F6A40), UINT64_C(0x0026725B42FA4F9B), + UINT64_C(0x039198D12A999847), UINT64_C(0x010A9C957A1EFA18), + UINT64_C(0x012FAA8E7E5D1356), UINT64_C(0x0205AB8BB7E3A8BA), + UINT64_C(0x015652F190E95489), UINT64_C(0x0231452E385A88C6), + UINT64_C(0x0096A500D25B0C46) }, + { UINT64_C(0x01B6696514F1EAD3), UINT64_C(0x026BE39E6BD0E127), + UINT64_C(0x01725DEFE2C66DD3), UINT64_C(0x01FEAE05ECA5B5BB), + UINT64_C(0x015AA101430609C7), UINT64_C(0x0274AAB1807123A3), + UINT64_C(0x02A446B243B7DBAC), UINT64_C(0x007DC3A911987A6B), + UINT64_C(0x005309D7E2813F76) } }, + { { UINT64_C(0x01966924104023FD), UINT64_C(0x0020B1F67AD27833), + UINT64_C(0x03DFD742FB1D5AC6), UINT64_C(0x017F6DD6D843D1C9), + UINT64_C(0x01DEAB06F70CFD0B), UINT64_C(0x00F3AAA1D84BA46E), + UINT64_C(0x01535D03B00F23FA), UINT64_C(0x02F223786ADE70A7), + UINT64_C(0x00DC3F149A4B2AAE) }, + { UINT64_C(0x0318A8079CA626DD), UINT64_C(0x00A1DE38CE5C6BE6), + UINT64_C(0x032F55E2E4E50992), UINT64_C(0x0192257A6FB7EED9), + UINT64_C(0x020B9106C175FDEB), UINT64_C(0x001ACA988C739470), + UINT64_C(0x02A12D0A78C3DAD7), UINT64_C(0x02A0BFDBC1802E4D), + UINT64_C(0x0138CB75E6BBB8BA) } }, + { { UINT64_C(0x00B271637F32AB3F), UINT64_C(0x02196867BE3CDC78), + UINT64_C(0x00647C1710CC4F5D), UINT64_C(0x00A0EDE0B8D8DB71), + UINT64_C(0x0092AB51B9BB942A), UINT64_C(0x030CEE5FF47C8C77), + UINT64_C(0x0172B6296758CE89), UINT64_C(0x03FBF70A184CFE5F), + UINT64_C(0x0101B88E67F1E05D) }, + { UINT64_C(0x02FFBCD12737D38E), UINT64_C(0x02754305441EA3F7), + UINT64_C(0x0174766ADA98B6A0), UINT64_C(0x00EEEAD822C29CD7), + UINT64_C(0x02D88F6B991FA26B), UINT64_C(0x02CB655B1E5DF95B), + UINT64_C(0x03DD0BD505307E4F), UINT64_C(0x010182FDFC359D4A), + UINT64_C(0x00755C3675A01A9E) } }, + { { UINT64_C(0x00371ACBFD4D4113), UINT64_C(0x01CD0CEE90EDA0C0), + UINT64_C(0x023F0667BA099F71), UINT64_C(0x0122476EC028AFF8), + UINT64_C(0x0057490C1B9D3C8E), UINT64_C(0x0037D1A2CAFBC030), + UINT64_C(0x0357613B144BA059), UINT64_C(0x030B5ED5F7E2DFAA), + UINT64_C(0x00C03407E66571BC) }, + { UINT64_C(0x015B2051592A3113), UINT64_C(0x033C0B977FE1CA61), + UINT64_C(0x0114564ECE17F466), UINT64_C(0x02770F5D995C1ECC), + UINT64_C(0x01D8797648C617E7), UINT64_C(0x00B30F6FB78CAD34), + UINT64_C(0x036CD504495109EC), UINT64_C(0x02EA78A9F6758E7F), + UINT64_C(0x007A71C9E769E9C6) } }, + { { UINT64_C(0x011D5BE35201CD59), UINT64_C(0x0209D1C58765C0EE), + UINT64_C(0x01D25192839B1DB8), UINT64_C(0x03EAD38ED4A2B60E), + UINT64_C(0x0057B36709A7B7AA), UINT64_C(0x0085B62AF338BC2B), + UINT64_C(0x030F3BEF5577F894), UINT64_C(0x0390BAA242140FD9), + UINT64_C(0x011B9BF27FA21CD6) }, + { UINT64_C(0x031FF60458FFB263), UINT64_C(0x00D71C9EC589C2CE), + UINT64_C(0x006C50B6449B7493), UINT64_C(0x034EF7D63824AD56), + UINT64_C(0x038578A6820938F3), UINT64_C(0x00843B021ED27247), + UINT64_C(0x02672B0B7E864C01), UINT64_C(0x00FE28A0AD914F56), + UINT64_C(0x01870F7E6544AD26) } }, + { { UINT64_C(0x03FABFF21E593E49), UINT64_C(0x01EB902CACEDCD38), + UINT64_C(0x010907F07EA1634E), UINT64_C(0x013A3B3D20F1ACCD), + UINT64_C(0x035F3C751269190C), UINT64_C(0x02F6BAE3746C46A6), + UINT64_C(0x00097CBB9F7B998C), UINT64_C(0x016B88BF2C151BD8), + UINT64_C(0x01317587E7C4BAF5) }, + { UINT64_C(0x027516E2062B46F6), UINT64_C(0x01703ECD4583F2AB), + UINT64_C(0x007D01ABE67B4364), UINT64_C(0x00F1753628034E7C), + UINT64_C(0x0108FF0FECD3BD76), UINT64_C(0x033B697531A2F0AC), + UINT64_C(0x010AC9943B9A6425), UINT64_C(0x020BC633526FFAA7), + UINT64_C(0x0006E03EC9A132B1) } }, + { { UINT64_C(0x016BC247531FFCBB), UINT64_C(0x02EE2DDBF721D516), + UINT64_C(0x0052E0725E10638A), UINT64_C(0x013566F49B1AAC88), + UINT64_C(0x007343ED5106C60D), UINT64_C(0x02985C4AAAB232AC), + UINT64_C(0x0113830C6312DE7A), UINT64_C(0x0136F1CF05895FFF), + UINT64_C(0x01ED7817C0B0027B) }, + { UINT64_C(0x02716A42F749B010), UINT64_C(0x039DC807B7BDBC44), + UINT64_C(0x035DFD64A2C7F19C), UINT64_C(0x00AFE5B488D67F84), + UINT64_C(0x03831B1AD5D8B241), UINT64_C(0x00FEF3BA557CC901), + UINT64_C(0x0082C2A38F96B970), UINT64_C(0x027380F80F3D96E5), + UINT64_C(0x014FDF6544812C07) } }, + { { UINT64_C(0x03600187B0C6A752), UINT64_C(0x019E405A0263FA53), + UINT64_C(0x000E0EA369E1C1BF), UINT64_C(0x0130C422E3895E24), + UINT64_C(0x035F4072E884BDCB), UINT64_C(0x0284B4DBC9FDB267), + UINT64_C(0x0159D4401B2054DE), UINT64_C(0x03649FACE16E526C), + UINT64_C(0x0100AC3AAFFE225D) }, + { UINT64_C(0x03BA224ACAFA8C2B), UINT64_C(0x031E5C26E31FAF8C), + UINT64_C(0x00B183566D47E97E), UINT64_C(0x0020C64F9C9C2688), + UINT64_C(0x02F6655D04CC893B), UINT64_C(0x03908BE8D4648FE4), + UINT64_C(0x02F14F85922DC116), UINT64_C(0x031D345610C10114), + UINT64_C(0x00FC287447A5FA2D) } }, + { { UINT64_C(0x020880798CEE5802), UINT64_C(0x03BE370A4C38C7FF), + UINT64_C(0x00934BE76CF041A3), UINT64_C(0x011B7A12BC50EEE4), + UINT64_C(0x0301BD4FC9636CD4), UINT64_C(0x03C53C2A0264C2CE), + UINT64_C(0x0347FF0A389DC319), UINT64_C(0x03A848048891AD07), + UINT64_C(0x0110D35394388CFB) }, + { UINT64_C(0x0042E86EE18DA0C0), UINT64_C(0x0359DB5D730A12EE), + UINT64_C(0x03D8CD72D5690026), UINT64_C(0x01FD191FD18F2690), + UINT64_C(0x00B8691FD8727A16), UINT64_C(0x0135130205267C55), + UINT64_C(0x011FDBAF57A304DB), UINT64_C(0x012D7FC9DED7342D), + UINT64_C(0x01BFE56058019C74) } }, + { { UINT64_C(0x00ADCF21754184BF), UINT64_C(0x02532EC18F101A1B), + UINT64_C(0x02E7AA58B7598AF4), UINT64_C(0x0297C67528666348), + UINT64_C(0x022BAF11DF85DAD5), UINT64_C(0x0097F7BCDA9CFFA7), + UINT64_C(0x03F0C563228A2E65), UINT64_C(0x0316126723B57D49), + UINT64_C(0x019B45ECCD3F5983) }, + { UINT64_C(0x02B86D25E0A95EDC), UINT64_C(0x027ED42D9C73BD22), + UINT64_C(0x0385F10181D77392), UINT64_C(0x02C8AA05E16378DB), + UINT64_C(0x02962E884B04947C), UINT64_C(0x00A054D788CF48A9), + UINT64_C(0x006616654F6E2CF7), UINT64_C(0x021848D66B0ACC97), + UINT64_C(0x00E73704171C5696) } }, + } +}; + +/*- + * Finite field inversion. + * Computed with exponentiation via FLT. + * Autogenerated: ecp/secp521r1/fe_inv.op3 + * custom repunit addition chain + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp521r1_inv(fe_t output, const fe_t t1) +{ + int i; + /* temporary variables */ + fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8; + + fiat_secp521r1_carry_square(acc, t1); + fiat_secp521r1_carry_mul(t2, acc, t1); + fiat_secp521r1_carry_square(acc, t2); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t4, acc, t2); + fiat_secp521r1_carry_square(acc, t4); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t8, acc, t4); + fiat_secp521r1_carry_square(acc, t8); + for (i = 0; i < 7; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t16, acc, t8); + fiat_secp521r1_carry_square(acc, t16); + for (i = 0; i < 15; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t32, acc, t16); + fiat_secp521r1_carry_square(acc, t32); + for (i = 0; i < 31; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t64, acc, t32); + fiat_secp521r1_carry_square(acc, t64); + for (i = 0; i < 63; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t128, acc, t64); + fiat_secp521r1_carry_square(acc, t128); + for (i = 0; i < 127; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t256, acc, t128); + fiat_secp521r1_carry_square(acc, t256); + for (i = 0; i < 255; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t512, acc, t256); + fiat_secp521r1_carry_square(acc, t512); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t516, acc, t4); + fiat_secp521r1_carry_square(acc, t516); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t518, acc, t2); + fiat_secp521r1_carry_square(acc, t518); + fiat_secp521r1_carry_mul(t519, acc, t1); + fiat_secp521r1_carry_square(acc, t519); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(output, acc, t1); +} + +/*- + * Q := 2P, both projective, Q and P same pointers OK + * Autogenerated: op3/dbl_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 6 + * ASSERT: a = -3 + */ +static void +point_double(pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X = P->X; + const limb_t *Y = P->Y; + const limb_t *Z = P->Z; + limb_t *X3 = Q->X; + limb_t *Y3 = Q->Y; + limb_t *Z3 = Q->Z; + + /* the curve arith formula */ + fiat_secp521r1_carry_square(t0, X); + fiat_secp521r1_carry_square(t1, Y); + fiat_secp521r1_carry_square(t2, Z); + fiat_secp521r1_carry_mul(t3, X, Y); + fiat_secp521r1_carry_add(t3, t3, t3); + fiat_secp521r1_carry_mul(t4, Y, Z); + fiat_secp521r1_carry_mul(Z3, X, Z); + fiat_secp521r1_carry_add(Z3, Z3, Z3); + fiat_secp521r1_carry_mul(Y3, b, t2); + fiat_secp521r1_carry_sub(Y3, Y3, Z3); + fiat_secp521r1_carry_add(X3, Y3, Y3); + fiat_secp521r1_carry_add(Y3, X3, Y3); + fiat_secp521r1_carry_sub(X3, t1, Y3); + fiat_secp521r1_carry_add(Y3, t1, Y3); + fiat_secp521r1_carry_mul(Y3, X3, Y3); + fiat_secp521r1_carry_mul(X3, X3, t3); + fiat_secp521r1_carry_add(t3, t2, t2); + fiat_secp521r1_carry_add(t2, t2, t3); + fiat_secp521r1_carry_mul(Z3, b, Z3); + fiat_secp521r1_carry_sub(Z3, Z3, t2); + fiat_secp521r1_carry_sub(Z3, Z3, t0); + fiat_secp521r1_carry_add(t3, Z3, Z3); + fiat_secp521r1_carry_add(Z3, Z3, t3); + fiat_secp521r1_carry_add(t3, t0, t0); + fiat_secp521r1_carry_add(t0, t3, t0); + fiat_secp521r1_carry_sub(t0, t0, t2); + fiat_secp521r1_carry_mul(t0, t0, Z3); + fiat_secp521r1_carry_add(Y3, Y3, t0); + fiat_secp521r1_carry_add(t0, t4, t4); + fiat_secp521r1_carry_mul(Z3, t0, Z3); + fiat_secp521r1_carry_sub(X3, X3, Z3); + fiat_secp521r1_carry_mul(Z3, t0, t1); + fiat_secp521r1_carry_add(Z3, Z3, Z3); + fiat_secp521r1_carry_add(Z3, Z3, Z3); +} + +/*- + * out1 = (arg1 == 0) ? 0 : nz + * NB: this is not a "mod p equiv" 0, but literal 0 + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp521r1_nonzero(limb_t *out1, const fe_t arg1) +{ + limb_t x1 = 0; + int i; + + for (i = 0; i < LIMB_CNT; i++) + x1 |= arg1[i]; + *out1 = x1; +} + +/*- + * R := Q + P where R and Q are projective, P affine. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_mixed.op3 + * https://eprint.iacr.org/2015/1060 Alg 5 + * ASSERT: a = -3 + */ +static void +point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + fe_t X3; + fe_t Y3; + fe_t Z3; + limb_t nz; + + /* check P for affine inf */ + fiat_secp521r1_nonzero(&nz, P->Y); + + /* the curve arith formula */ + fiat_secp521r1_carry_mul(t0, X1, X2); + fiat_secp521r1_carry_mul(t1, Y1, Y2); + fiat_secp521r1_carry_add(t3, X2, Y2); + fiat_secp521r1_carry_add(t4, X1, Y1); + fiat_secp521r1_carry_mul(t3, t3, t4); + fiat_secp521r1_carry_add(t4, t0, t1); + fiat_secp521r1_carry_sub(t3, t3, t4); + fiat_secp521r1_carry_mul(t4, Y2, Z1); + fiat_secp521r1_carry_add(t4, t4, Y1); + fiat_secp521r1_carry_mul(Y3, X2, Z1); + fiat_secp521r1_carry_add(Y3, Y3, X1); + fiat_secp521r1_carry_mul(Z3, b, Z1); + fiat_secp521r1_carry_sub(X3, Y3, Z3); + fiat_secp521r1_carry_add(Z3, X3, X3); + fiat_secp521r1_carry_add(X3, X3, Z3); + fiat_secp521r1_carry_sub(Z3, t1, X3); + fiat_secp521r1_carry_add(X3, t1, X3); + fiat_secp521r1_carry_mul(Y3, b, Y3); + fiat_secp521r1_carry_add(t1, Z1, Z1); + fiat_secp521r1_carry_add(t2, t1, Z1); + fiat_secp521r1_carry_sub(Y3, Y3, t2); + fiat_secp521r1_carry_sub(Y3, Y3, t0); + fiat_secp521r1_carry_add(t1, Y3, Y3); + fiat_secp521r1_carry_add(Y3, t1, Y3); + fiat_secp521r1_carry_add(t1, t0, t0); + fiat_secp521r1_carry_add(t0, t1, t0); + fiat_secp521r1_carry_sub(t0, t0, t2); + fiat_secp521r1_carry_mul(t1, t4, Y3); + fiat_secp521r1_carry_mul(t2, t0, Y3); + fiat_secp521r1_carry_mul(Y3, X3, Z3); + fiat_secp521r1_carry_add(Y3, Y3, t2); + fiat_secp521r1_carry_mul(X3, t3, X3); + fiat_secp521r1_carry_sub(X3, X3, t1); + fiat_secp521r1_carry_mul(Z3, t4, Z3); + fiat_secp521r1_carry_mul(t1, t3, t0); + fiat_secp521r1_carry_add(Z3, Z3, t1); + + /* if P is inf, throw all that away and take Q */ + fiat_secp521r1_selectznz(R->X, nz, Q->X, X3); + fiat_secp521r1_selectznz(R->Y, nz, Q->Y, Y3); + fiat_secp521r1_selectznz(R->Z, nz, Q->Z, Z3); +} + +/*- + * R := Q + P all projective. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 4 + * ASSERT: a = -3 + */ +static void +point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4, t5; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + const limb_t *Z2 = P->Z; + limb_t *X3 = R->X; + limb_t *Y3 = R->Y; + limb_t *Z3 = R->Z; + + /* the curve arith formula */ + fiat_secp521r1_carry_mul(t0, X1, X2); + fiat_secp521r1_carry_mul(t1, Y1, Y2); + fiat_secp521r1_carry_mul(t2, Z1, Z2); + fiat_secp521r1_carry_add(t3, X1, Y1); + fiat_secp521r1_carry_add(t4, X2, Y2); + fiat_secp521r1_carry_mul(t3, t3, t4); + fiat_secp521r1_carry_add(t4, t0, t1); + fiat_secp521r1_carry_sub(t3, t3, t4); + fiat_secp521r1_carry_add(t4, Y1, Z1); + fiat_secp521r1_carry_add(t5, Y2, Z2); + fiat_secp521r1_carry_mul(t4, t4, t5); + fiat_secp521r1_carry_add(t5, t1, t2); + fiat_secp521r1_carry_sub(t4, t4, t5); + fiat_secp521r1_carry_add(X3, X1, Z1); + fiat_secp521r1_carry_add(Y3, X2, Z2); + fiat_secp521r1_carry_mul(X3, X3, Y3); + fiat_secp521r1_carry_add(Y3, t0, t2); + fiat_secp521r1_carry_sub(Y3, X3, Y3); + fiat_secp521r1_carry_mul(Z3, b, t2); + fiat_secp521r1_carry_sub(X3, Y3, Z3); + fiat_secp521r1_carry_add(Z3, X3, X3); + fiat_secp521r1_carry_add(X3, X3, Z3); + fiat_secp521r1_carry_sub(Z3, t1, X3); + fiat_secp521r1_carry_add(X3, t1, X3); + fiat_secp521r1_carry_mul(Y3, b, Y3); + fiat_secp521r1_carry_add(t1, t2, t2); + fiat_secp521r1_carry_add(t2, t1, t2); + fiat_secp521r1_carry_sub(Y3, Y3, t2); + fiat_secp521r1_carry_sub(Y3, Y3, t0); + fiat_secp521r1_carry_add(t1, Y3, Y3); + fiat_secp521r1_carry_add(Y3, t1, Y3); + fiat_secp521r1_carry_add(t1, t0, t0); + fiat_secp521r1_carry_add(t0, t1, t0); + fiat_secp521r1_carry_sub(t0, t0, t2); + fiat_secp521r1_carry_mul(t1, t4, Y3); + fiat_secp521r1_carry_mul(t2, t0, Y3); + fiat_secp521r1_carry_mul(Y3, X3, Z3); + fiat_secp521r1_carry_add(Y3, Y3, t2); + fiat_secp521r1_carry_mul(X3, t3, X3); + fiat_secp521r1_carry_sub(X3, X3, t1); + fiat_secp521r1_carry_mul(Z3, t4, Z3); + fiat_secp521r1_carry_mul(t1, t3, t0); + fiat_secp521r1_carry_add(Z3, Z3, t1); +} + +/* constants */ +#define RADIX 5 +#define DRADIX (1 << RADIX) +#define DRADIX_WNAF ((DRADIX) << 1) + +/*- + * precomp for wnaf scalar multiplication: + * precomp[0] = 1P + * precomp[1] = 3P + * precomp[2] = 5P + * precomp[3] = 7P + * precomp[4] = 9P + * ... + */ +static void +precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P) +{ + int i; + + fe_copy(precomp[0].X, P->X); + fe_copy(precomp[0].Y, P->Y); + fe_copy(precomp[0].Z, const_one); + point_double(&precomp[DRADIX / 2 - 1], &precomp[0]); + + for (i = 1; i < DRADIX / 2; i++) + point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]); +} + +/* fetch a scalar bit */ +static int +scalar_get_bit(const unsigned char in[66], int idx) +{ + int widx, rshift; + + widx = idx >> 3; + rshift = idx & 0x7; + + if (idx < 0 || widx >= 66) + return 0; + + return (in[widx] >> rshift) & 0x1; +} + +/*- + * Compute "regular" wnaf representation of a scalar. + * See "Exponent Recoding and Regular Exponentiation Algorithms", + * Tunstall et al., AfricaCrypt 2009, Alg 6. + * It forces an odd scalar and outputs digits in + * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...} + * i.e. signed odd digits with _no zeroes_ -- that makes it "regular". + */ +static void +scalar_rwnaf(int8_t out[106], const unsigned char in[66]) +{ + int i; + int8_t window, d; + + window = (in[0] & (DRADIX_WNAF - 1)) | 1; + for (i = 0; i < 105; i++) { + d = (window & (DRADIX_WNAF - 1)) - DRADIX; + out[i] = d; + window = (window - d) >> RADIX; + window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1; + window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2; + window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3; + window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4; + window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5; + } + out[i] = window; +} + +/*- + * Compute "textbook" wnaf representation of a scalar. + * NB: not constant time + */ +static void +scalar_wnaf(int8_t out[529], const unsigned char in[66]) +{ + int i; + int8_t window, d; + + window = in[0] & (DRADIX_WNAF - 1); + for (i = 0; i < 529; i++) { + d = 0; + if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX)) + d -= DRADIX_WNAF; + out[i] = d; + window = (window - d) >> 1; + window += scalar_get_bit(in, i + 1 + RADIX) << RADIX; + } +} + +/*- + * Simultaneous scalar multiplication: interleaved "textbook" wnaf. + * NB: not constant time + */ +static void +var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], + const unsigned char b[66], const pt_aff_t *P) +{ + int i, d, is_neg, is_inf = 1, flipped = 0; + int8_t anaf[529] = { 0 }; + int8_t bnaf[529] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_wnaf(anaf, a); + scalar_wnaf(bnaf, b); + + for (i = 528; i >= 0; i--) { + if (!is_inf) + point_double(&Q, &Q); + if ((d = bnaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp521r1_carry_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &precomp[d].X); + fe_copy(Q.Y, &precomp[d].Y); + fe_copy(Q.Z, &precomp[d].Z); + is_inf = 0; + } else + point_add_proj(&Q, &Q, &precomp[d]); + } + if ((d = anaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp521r1_carry_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &lut_cmb[0][d].X); + fe_copy(Q.Y, &lut_cmb[0][d].Y); + fe_copy(Q.Z, const_one); + is_inf = 0; + } else + point_add_mixed(&Q, &Q, &lut_cmb[0][d]); + } + } + + if (is_inf) { + /* initialize accumulator to inf: all-zero scalars */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + } + + if (flipped) { + /* correct sign */ + fiat_secp521r1_carry_opp(Q.Y, Q.Y); + } + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp521r1_inv(Q.Z, Q.Z); + fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z); + fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. + */ +static void +var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], + const pt_aff_t *P) +{ + int i, j, d, diff, is_neg; + int8_t rnaf[106] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_rwnaf(rnaf, scalar); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + /* initialize accumulator to high digit */ + d = (rnaf[105] - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp521r1_selectznz(Q.X, diff, Q.X, precomp[j].X); + fiat_secp521r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y); + fiat_secp521r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z); + } + + for (i = 104; i >= 0; i--) { + for (j = 0; j < RADIX; j++) + point_double(&Q, &Q); + d = rnaf[i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp521r1_selectznz(lut.X, diff, lut.X, precomp[j].X); + fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y); + fiat_secp521r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z); + } + /* negate lut point if digit is negative */ + fiat_secp521r1_carry_opp(out->Y, lut.Y); + fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_proj(&Q, &Q, &lut); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, precomp[0].X); + fiat_secp521r1_carry_opp(lut.Y, precomp[0].Y); + fe_copy(lut.Z, precomp[0].Z); + point_add_proj(&lut, &lut, &Q); + fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X); + fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y); + fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp521r1_inv(Q.Z, Q.Z); + fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z); + fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Fixed scalar multiplication: comb with interleaving. + */ +static void +fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) +{ + int i, j, k, d, diff, is_neg = 0; + int8_t rnaf[106] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; + + scalar_rwnaf(rnaf, scalar); + + /* initalize accumulator to inf */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + for (i = 8; i >= 0; i--) { + for (j = 0; i != 8 && j < RADIX; j++) + point_double(&Q, &Q); + for (j = 0; j < 13; j++) { + if (j * 9 + i > 105) + continue; + d = rnaf[j * 9 + i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (k = 0; k < DRADIX / 2; k++) { + diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp521r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X); + fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y); + } + /* negate lut point if digit is negative */ + fiat_secp521r1_carry_opp(out->Y, lut.Y); + fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_mixed(&Q, &Q, &lut); + } + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, lut_cmb[0][0].X); + fiat_secp521r1_carry_opp(lut.Y, lut_cmb[0][0].Y); + point_add_mixed(&R, &Q, &lut); + fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X); + fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y); + fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp521r1_inv(Q.Z, Q.Z); + fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z); + fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Wrapper: simultaneous scalar mutiplication. + * outx, outy := a * G + b * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char a[66], + const unsigned char b[66], + const unsigned char inx[66], + const unsigned char iny[66]) +{ + pt_aff_t P; + + fiat_secp521r1_from_bytes(P.X, inx); + fiat_secp521r1_from_bytes(P.Y, iny); + /* simultaneous scalar multiplication */ + var_smul_wnaf_two(&P, a, b, &P); + + fiat_secp521r1_to_bytes(outx, P.X); + fiat_secp521r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: fixed scalar mutiplication. + * outx, outy := scalar * G + * Everything is LE byte ordering. + */ +static void +point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66]) +{ + pt_aff_t P; + + /* fixed scmul function */ + fixed_smul_cmb(&P, scalar); + fiat_secp521r1_to_bytes(outx, P.X); + fiat_secp521r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: variable point scalar mutiplication. + * outx, outy := scalar * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66], + const unsigned char inx[66], + const unsigned char iny[66]) +{ + pt_aff_t P; + + fiat_secp521r1_from_bytes(P.X, inx); + fiat_secp521r1_from_bytes(P.Y, iny); + /* var scmul function */ + var_smul_rwnaf(&P, scalar, &P); + fiat_secp521r1_to_bytes(outx, P.X); + fiat_secp521r1_to_bytes(outy, P.Y); +} + +#undef RADIX +#include "ecp.h" +#include "mpi-priv.h" +#include "mplogic.h" + +/*- + * reverse bytes -- total hack + */ +#define MP_BE2LE(a) \ + do { \ + unsigned char z_bswap; \ + z_bswap = a[0]; \ + a[0] = a[65]; \ + a[65] = z_bswap; \ + z_bswap = a[1]; \ + a[1] = a[64]; \ + a[64] = z_bswap; \ + z_bswap = a[2]; \ + a[2] = a[63]; \ + a[63] = z_bswap; \ + z_bswap = a[3]; \ + a[3] = a[62]; \ + a[62] = z_bswap; \ + z_bswap = a[4]; \ + a[4] = a[61]; \ + a[61] = z_bswap; \ + z_bswap = a[5]; \ + a[5] = a[60]; \ + a[60] = z_bswap; \ + z_bswap = a[6]; \ + a[6] = a[59]; \ + a[59] = z_bswap; \ + z_bswap = a[7]; \ + a[7] = a[58]; \ + a[58] = z_bswap; \ + z_bswap = a[8]; \ + a[8] = a[57]; \ + a[57] = z_bswap; \ + z_bswap = a[9]; \ + a[9] = a[56]; \ + a[56] = z_bswap; \ + z_bswap = a[10]; \ + a[10] = a[55]; \ + a[55] = z_bswap; \ + z_bswap = a[11]; \ + a[11] = a[54]; \ + a[54] = z_bswap; \ + z_bswap = a[12]; \ + a[12] = a[53]; \ + a[53] = z_bswap; \ + z_bswap = a[13]; \ + a[13] = a[52]; \ + a[52] = z_bswap; \ + z_bswap = a[14]; \ + a[14] = a[51]; \ + a[51] = z_bswap; \ + z_bswap = a[15]; \ + a[15] = a[50]; \ + a[50] = z_bswap; \ + z_bswap = a[16]; \ + a[16] = a[49]; \ + a[49] = z_bswap; \ + z_bswap = a[17]; \ + a[17] = a[48]; \ + a[48] = z_bswap; \ + z_bswap = a[18]; \ + a[18] = a[47]; \ + a[47] = z_bswap; \ + z_bswap = a[19]; \ + a[19] = a[46]; \ + a[46] = z_bswap; \ + z_bswap = a[20]; \ + a[20] = a[45]; \ + a[45] = z_bswap; \ + z_bswap = a[21]; \ + a[21] = a[44]; \ + a[44] = z_bswap; \ + z_bswap = a[22]; \ + a[22] = a[43]; \ + a[43] = z_bswap; \ + z_bswap = a[23]; \ + a[23] = a[42]; \ + a[42] = z_bswap; \ + z_bswap = a[24]; \ + a[24] = a[41]; \ + a[41] = z_bswap; \ + z_bswap = a[25]; \ + a[25] = a[40]; \ + a[40] = z_bswap; \ + z_bswap = a[26]; \ + a[26] = a[39]; \ + a[39] = z_bswap; \ + z_bswap = a[27]; \ + a[27] = a[38]; \ + a[38] = z_bswap; \ + z_bswap = a[28]; \ + a[28] = a[37]; \ + a[37] = z_bswap; \ + z_bswap = a[29]; \ + a[29] = a[36]; \ + a[36] = z_bswap; \ + z_bswap = a[30]; \ + a[30] = a[35]; \ + a[35] = z_bswap; \ + z_bswap = a[31]; \ + a[31] = a[34]; \ + a[34] = z_bswap; \ + z_bswap = a[32]; \ + a[32] = a[33]; \ + a[33] = z_bswap; \ + } while (0) + +static mp_err +point_mul_g_secp521r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[66]; + unsigned char b_y[66]; + unsigned char b_n[66]; + mp_err res; + + ARGCHK(n != NULL && out_x != NULL && out_y != NULL, MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 521 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66)); + MP_BE2LE(b_n); + point_mul_g_secp521r1(b_x, b_y, b_n); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_secp521r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[66]; + unsigned char b_y[66]; + unsigned char b_n[66]; + mp_err res; + + ARGCHK(n != NULL && in_x != NULL && in_y != NULL && out_x != NULL && + out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 521 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 66)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n); + point_mul_secp521r1(b_x, b_y, b_n, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_two_secp521r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) +{ + unsigned char b_x[66]; + unsigned char b_y[66]; + unsigned char b_n1[66]; + unsigned char b_n2[66]; + mp_err res; + + /* If n2 == NULL or 0, this is just a base-point multiplication. */ + if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) + return point_mul_g_secp521r1_wrap(n1, out_x, out_y, group); + + /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ + if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) + return point_mul_secp521r1_wrap(n2, in_x, in_y, out_x, out_y, group); + + ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n1) > 521 || mp_cmp_z(n1) != MP_GT || + mpl_significant_bits(n2) > 521 || mp_cmp_z(n2) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n1, b_n1, 66)); + MP_CHECKOK(mp_to_fixlen_octets(n2, b_n2, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 66)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n1); + MP_BE2LE(b_n2); + point_mul_two_secp521r1(b_x, b_y, b_n1, b_n2, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66)); + +CLEANUP: + return res; +} + +mp_err +ec_group_set_secp521r1(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P521) { + group->base_point_mul = &point_mul_g_secp521r1_wrap; + group->point_mul = &point_mul_secp521r1_wrap; + group->points_mul = &point_mul_two_secp521r1_wrap; + } + return MP_OKAY; +} + +#else /* __SIZEOF_INT128__ */ + +#include +#include +#define LIMB_BITS 32 +#define LIMB_CNT 19 +/* Field elements */ +typedef uint32_t fe_t[LIMB_CNT]; +typedef uint32_t limb_t; + +#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t)) +#define fe_set_zero(d) memset(d, 0, sizeof(fe_t)) + +/* Projective points */ +typedef struct { + fe_t X; + fe_t Y; + fe_t Z; +} pt_prj_t; + +/* Affine points */ +typedef struct { + fe_t X; + fe_t Y; +} pt_aff_t; + +/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */ +/*- + * MIT License + * + * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file). + * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Autogenerated: unsaturated_solinas --static --use-value-barrier secp521r1 32 '(auto)' '2^521 - 1' */ +/* curve description: secp521r1 */ +/* machine_wordsize = 32 (from "32") */ +/* requested operations: (all) */ +/* n = 19 (from "(auto)") */ +/* s-c = 2^521 - [(1, 1)] (from "2^521 - 1") */ +/* tight_bounds_multiplier = 1 (from "") */ +/* */ +/* Computed values: */ +/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1] */ +/* eval z = z[0] + (z[1] << 28) + (z[2] << 55) + (z[3] << 83) + (z[4] << 110) + (z[5] << 138) + (z[6] << 165) + (z[7] << 192) + (z[8] << 220) + (z[9] << 247) + (z[10] << 0x113) + (z[11] << 0x12e) + (z[12] << 0x14a) + (z[13] << 0x165) + (z[14] << 0x180) + (z[15] << 0x19c) + (z[16] << 0x1b7) + (z[17] << 0x1d3) + (z[18] << 0x1ee) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */ +/* balance = [0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe] */ + +#include +typedef unsigned char fiat_secp521r1_uint1; +typedef signed char fiat_secp521r1_int1; +#ifdef __GNUC__ +#define FIAT_SECP521R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP521R1_FIAT_INLINE +#endif + +/* The type fiat_secp521r1_loose_field_element is a field element with loose bounds. */ +/* Bounds: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] */ +typedef uint32_t fiat_secp521r1_loose_field_element[19]; + +/* The type fiat_secp521r1_tight_field_element is a field element with tight bounds. */ +/* Bounds: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */ +typedef uint32_t fiat_secp521r1_tight_field_element[19]; + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + +#if !defined(FIAT_SECP521R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +static __inline__ uint32_t +fiat_secp521r1_value_barrier_u32(uint32_t a) +{ + __asm__("" + : "+r"(a) + : /* no inputs */); + return a; +} +#else +#define fiat_secp521r1_value_barrier_u32(x) (x) +#endif + +/* + * The function fiat_secp521r1_addcarryx_u28 is an addition with carry. + * + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^28 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^28⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xfffffff] + * arg3: [0x0 ~> 0xfffffff] + * Output Bounds: + * out1: [0x0 ~> 0xfffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_addcarryx_u28(uint32_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint32_t arg2, uint32_t arg3) +{ + uint32_t x1; + uint32_t x2; + fiat_secp521r1_uint1 x3; + x1 = ((arg1 + arg2) + arg3); + x2 = (x1 & UINT32_C(0xfffffff)); + x3 = (fiat_secp521r1_uint1)(x1 >> 28); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp521r1_subborrowx_u28 is a subtraction with borrow. + * + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^28 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^28⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xfffffff] + * arg3: [0x0 ~> 0xfffffff] + * Output Bounds: + * out1: [0x0 ~> 0xfffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_subborrowx_u28(uint32_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint32_t arg2, uint32_t arg3) +{ + int32_t x1; + fiat_secp521r1_int1 x2; + uint32_t x3; + x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3); + x2 = (fiat_secp521r1_int1)(x1 >> 28); + x3 = (x1 & UINT32_C(0xfffffff)); + *out1 = x3; + *out2 = (fiat_secp521r1_uint1)(0x0 - x2); +} + +/* + * The function fiat_secp521r1_addcarryx_u27 is an addition with carry. + * + * Postconditions: + * out1 = (arg1 + arg2 + arg3) mod 2^27 + * out2 = ⌊(arg1 + arg2 + arg3) / 2^27⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x7ffffff] + * arg3: [0x0 ~> 0x7ffffff] + * Output Bounds: + * out1: [0x0 ~> 0x7ffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_addcarryx_u27(uint32_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint32_t arg2, uint32_t arg3) +{ + uint32_t x1; + uint32_t x2; + fiat_secp521r1_uint1 x3; + x1 = ((arg1 + arg2) + arg3); + x2 = (x1 & UINT32_C(0x7ffffff)); + x3 = (fiat_secp521r1_uint1)(x1 >> 27); + *out1 = x2; + *out2 = x3; +} + +/* + * The function fiat_secp521r1_subborrowx_u27 is a subtraction with borrow. + * + * Postconditions: + * out1 = (-arg1 + arg2 + -arg3) mod 2^27 + * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^27⌋ + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0x7ffffff] + * arg3: [0x0 ~> 0x7ffffff] + * Output Bounds: + * out1: [0x0 ~> 0x7ffffff] + * out2: [0x0 ~> 0x1] + */ +static void +fiat_secp521r1_subborrowx_u27(uint32_t *out1, + fiat_secp521r1_uint1 *out2, + fiat_secp521r1_uint1 arg1, + uint32_t arg2, uint32_t arg3) +{ + int32_t x1; + fiat_secp521r1_int1 x2; + uint32_t x3; + x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3); + x2 = (fiat_secp521r1_int1)(x1 >> 27); + x3 = (x1 & UINT32_C(0x7ffffff)); + *out1 = x3; + *out2 = (fiat_secp521r1_uint1)(0x0 - x2); +} + +/* + * The function fiat_secp521r1_cmovznz_u32 is a single-word conditional move. + * + * Postconditions: + * out1 = (if arg1 = 0 then arg2 else arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffff] + * arg3: [0x0 ~> 0xffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + */ +static void +fiat_secp521r1_cmovznz_u32(uint32_t *out1, + fiat_secp521r1_uint1 arg1, uint32_t arg2, + uint32_t arg3) +{ + fiat_secp521r1_uint1 x1; + uint32_t x2; + uint32_t x3; + x1 = (!(!arg1)); + x2 = ((fiat_secp521r1_int1)(0x0 - x1) & UINT32_C(0xffffffff)); + x3 = ((fiat_secp521r1_value_barrier_u32(x2) & arg3) | + (fiat_secp521r1_value_barrier_u32((~x2)) & arg2)); + *out1 = x3; +} + +/* + * The function fiat_secp521r1_carry_mul multiplies two field elements and reduces the result. + * + * Postconditions: + * eval out1 mod m = (eval arg1 * eval arg2) mod m + * + */ +static void +fiat_secp521r1_carry_mul( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1, + const fiat_secp521r1_loose_field_element arg2) +{ + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + uint64_t x21; + uint64_t x22; + uint64_t x23; + uint64_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + uint64_t x45; + uint64_t x46; + uint64_t x47; + uint64_t x48; + uint64_t x49; + uint64_t x50; + uint64_t x51; + uint64_t x52; + uint64_t x53; + uint64_t x54; + uint64_t x55; + uint64_t x56; + uint64_t x57; + uint64_t x58; + uint64_t x59; + uint64_t x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + uint64_t x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + uint64_t x74; + uint64_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + uint64_t x82; + uint64_t x83; + uint64_t x84; + uint64_t x85; + uint64_t x86; + uint64_t x87; + uint64_t x88; + uint64_t x89; + uint64_t x90; + uint64_t x91; + uint64_t x92; + uint64_t x93; + uint64_t x94; + uint64_t x95; + uint64_t x96; + uint64_t x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + uint64_t x104; + uint64_t x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + uint64_t x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + uint64_t x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + uint64_t x121; + uint64_t x122; + uint64_t x123; + uint64_t x124; + uint64_t x125; + uint64_t x126; + uint64_t x127; + uint64_t x128; + uint64_t x129; + uint64_t x130; + uint64_t x131; + uint64_t x132; + uint64_t x133; + uint64_t x134; + uint64_t x135; + uint64_t x136; + uint64_t x137; + uint64_t x138; + uint64_t x139; + uint64_t x140; + uint64_t x141; + uint64_t x142; + uint64_t x143; + uint64_t x144; + uint64_t x145; + uint64_t x146; + uint64_t x147; + uint64_t x148; + uint64_t x149; + uint64_t x150; + uint64_t x151; + uint64_t x152; + uint64_t x153; + uint64_t x154; + uint64_t x155; + uint64_t x156; + uint64_t x157; + uint64_t x158; + uint64_t x159; + uint64_t x160; + uint64_t x161; + uint64_t x162; + uint64_t x163; + uint64_t x164; + uint64_t x165; + uint64_t x166; + uint64_t x167; + uint64_t x168; + uint64_t x169; + uint64_t x170; + uint64_t x171; + uint64_t x172; + uint64_t x173; + uint64_t x174; + uint64_t x175; + uint64_t x176; + uint64_t x177; + uint64_t x178; + uint64_t x179; + uint64_t x180; + uint64_t x181; + uint64_t x182; + uint64_t x183; + uint64_t x184; + uint64_t x185; + uint64_t x186; + uint64_t x187; + uint64_t x188; + uint64_t x189; + uint64_t x190; + uint64_t x191; + uint64_t x192; + uint64_t x193; + uint64_t x194; + uint64_t x195; + uint64_t x196; + uint64_t x197; + uint64_t x198; + uint64_t x199; + uint64_t x200; + uint64_t x201; + uint64_t x202; + uint64_t x203; + uint64_t x204; + uint64_t x205; + uint64_t x206; + uint64_t x207; + uint64_t x208; + uint64_t x209; + uint64_t x210; + uint64_t x211; + uint64_t x212; + uint64_t x213; + uint64_t x214; + uint64_t x215; + uint64_t x216; + uint64_t x217; + uint64_t x218; + uint64_t x219; + uint64_t x220; + uint64_t x221; + uint64_t x222; + uint64_t x223; + uint64_t x224; + uint64_t x225; + uint64_t x226; + uint64_t x227; + uint64_t x228; + uint64_t x229; + uint64_t x230; + uint64_t x231; + uint64_t x232; + uint64_t x233; + uint64_t x234; + uint64_t x235; + uint64_t x236; + uint64_t x237; + uint64_t x238; + uint64_t x239; + uint64_t x240; + uint64_t x241; + uint64_t x242; + uint64_t x243; + uint64_t x244; + uint64_t x245; + uint64_t x246; + uint64_t x247; + uint64_t x248; + uint64_t x249; + uint64_t x250; + uint64_t x251; + uint64_t x252; + uint64_t x253; + uint64_t x254; + uint64_t x255; + uint64_t x256; + uint64_t x257; + uint64_t x258; + uint64_t x259; + uint64_t x260; + uint64_t x261; + uint64_t x262; + uint64_t x263; + uint64_t x264; + uint64_t x265; + uint64_t x266; + uint64_t x267; + uint64_t x268; + uint64_t x269; + uint64_t x270; + uint64_t x271; + uint64_t x272; + uint64_t x273; + uint64_t x274; + uint64_t x275; + uint64_t x276; + uint64_t x277; + uint64_t x278; + uint64_t x279; + uint64_t x280; + uint64_t x281; + uint64_t x282; + uint64_t x283; + uint64_t x284; + uint64_t x285; + uint64_t x286; + uint64_t x287; + uint64_t x288; + uint64_t x289; + uint64_t x290; + uint64_t x291; + uint64_t x292; + uint64_t x293; + uint64_t x294; + uint64_t x295; + uint64_t x296; + uint64_t x297; + uint64_t x298; + uint64_t x299; + uint64_t x300; + uint64_t x301; + uint64_t x302; + uint64_t x303; + uint64_t x304; + uint64_t x305; + uint64_t x306; + uint64_t x307; + uint64_t x308; + uint64_t x309; + uint64_t x310; + uint64_t x311; + uint64_t x312; + uint64_t x313; + uint64_t x314; + uint64_t x315; + uint64_t x316; + uint64_t x317; + uint64_t x318; + uint64_t x319; + uint64_t x320; + uint64_t x321; + uint64_t x322; + uint64_t x323; + uint64_t x324; + uint64_t x325; + uint64_t x326; + uint64_t x327; + uint64_t x328; + uint64_t x329; + uint64_t x330; + uint64_t x331; + uint64_t x332; + uint64_t x333; + uint64_t x334; + uint64_t x335; + uint64_t x336; + uint64_t x337; + uint64_t x338; + uint64_t x339; + uint64_t x340; + uint64_t x341; + uint64_t x342; + uint64_t x343; + uint64_t x344; + uint64_t x345; + uint64_t x346; + uint64_t x347; + uint64_t x348; + uint64_t x349; + uint64_t x350; + uint64_t x351; + uint64_t x352; + uint64_t x353; + uint64_t x354; + uint64_t x355; + uint64_t x356; + uint64_t x357; + uint64_t x358; + uint64_t x359; + uint64_t x360; + uint64_t x361; + uint64_t x362; + uint64_t x363; + uint32_t x364; + uint64_t x365; + uint64_t x366; + uint64_t x367; + uint64_t x368; + uint64_t x369; + uint64_t x370; + uint64_t x371; + uint64_t x372; + uint64_t x373; + uint64_t x374; + uint64_t x375; + uint64_t x376; + uint64_t x377; + uint64_t x378; + uint64_t x379; + uint64_t x380; + uint64_t x381; + uint64_t x382; + uint64_t x383; + uint64_t x384; + uint32_t x385; + uint64_t x386; + uint64_t x387; + uint32_t x388; + uint64_t x389; + uint64_t x390; + uint32_t x391; + uint64_t x392; + uint64_t x393; + uint32_t x394; + uint64_t x395; + uint64_t x396; + uint32_t x397; + uint64_t x398; + uint64_t x399; + uint32_t x400; + uint64_t x401; + uint64_t x402; + uint32_t x403; + uint64_t x404; + uint64_t x405; + uint32_t x406; + uint64_t x407; + uint64_t x408; + uint32_t x409; + uint64_t x410; + uint64_t x411; + uint32_t x412; + uint64_t x413; + uint64_t x414; + uint32_t x415; + uint64_t x416; + uint64_t x417; + uint32_t x418; + uint64_t x419; + uint64_t x420; + uint32_t x421; + uint64_t x422; + uint64_t x423; + uint32_t x424; + uint64_t x425; + uint64_t x426; + uint32_t x427; + uint64_t x428; + uint64_t x429; + uint32_t x430; + uint64_t x431; + uint64_t x432; + uint32_t x433; + uint64_t x434; + uint64_t x435; + uint32_t x436; + uint64_t x437; + uint32_t x438; + uint32_t x439; + uint32_t x440; + fiat_secp521r1_uint1 x441; + uint32_t x442; + uint32_t x443; + x1 = ((uint64_t)(arg1[18]) * (arg2[18])); + x2 = ((uint64_t)(arg1[18]) * ((arg2[17]) * 0x2)); + x3 = ((uint64_t)(arg1[18]) * (arg2[16])); + x4 = ((uint64_t)(arg1[18]) * ((arg2[15]) * 0x2)); + x5 = ((uint64_t)(arg1[18]) * (arg2[14])); + x6 = ((uint64_t)(arg1[18]) * (arg2[13])); + x7 = ((uint64_t)(arg1[18]) * ((arg2[12]) * 0x2)); + x8 = ((uint64_t)(arg1[18]) * (arg2[11])); + x9 = ((uint64_t)(arg1[18]) * ((arg2[10]) * 0x2)); + x10 = ((uint64_t)(arg1[18]) * (arg2[9])); + x11 = ((uint64_t)(arg1[18]) * ((arg2[8]) * 0x2)); + x12 = ((uint64_t)(arg1[18]) * (arg2[7])); + x13 = ((uint64_t)(arg1[18]) * (arg2[6])); + x14 = ((uint64_t)(arg1[18]) * ((arg2[5]) * 0x2)); + x15 = ((uint64_t)(arg1[18]) * (arg2[4])); + x16 = ((uint64_t)(arg1[18]) * ((arg2[3]) * 0x2)); + x17 = ((uint64_t)(arg1[18]) * (arg2[2])); + x18 = ((uint64_t)(arg1[18]) * ((arg2[1]) * 0x2)); + x19 = ((uint64_t)(arg1[17]) * ((arg2[18]) * 0x2)); + x20 = ((uint64_t)(arg1[17]) * ((arg2[17]) * 0x2)); + x21 = ((uint64_t)(arg1[17]) * ((arg2[16]) * 0x2)); + x22 = ((uint64_t)(arg1[17]) * ((arg2[15]) * 0x2)); + x23 = ((uint64_t)(arg1[17]) * (arg2[14])); + x24 = ((uint64_t)(arg1[17]) * ((arg2[13]) * 0x2)); + x25 = ((uint64_t)(arg1[17]) * ((arg2[12]) * 0x2)); + x26 = ((uint64_t)(arg1[17]) * ((arg2[11]) * 0x2)); + x27 = ((uint64_t)(arg1[17]) * ((arg2[10]) * 0x2)); + x28 = ((uint64_t)(arg1[17]) * ((arg2[9]) * 0x2)); + x29 = ((uint64_t)(arg1[17]) * ((arg2[8]) * 0x2)); + x30 = ((uint64_t)(arg1[17]) * (arg2[7])); + x31 = ((uint64_t)(arg1[17]) * ((arg2[6]) * 0x2)); + x32 = ((uint64_t)(arg1[17]) * ((arg2[5]) * 0x2)); + x33 = ((uint64_t)(arg1[17]) * ((arg2[4]) * 0x2)); + x34 = ((uint64_t)(arg1[17]) * ((arg2[3]) * 0x2)); + x35 = ((uint64_t)(arg1[17]) * ((arg2[2]) * 0x2)); + x36 = ((uint64_t)(arg1[16]) * (arg2[18])); + x37 = ((uint64_t)(arg1[16]) * ((arg2[17]) * 0x2)); + x38 = ((uint64_t)(arg1[16]) * (arg2[16])); + x39 = ((uint64_t)(arg1[16]) * (arg2[15])); + x40 = ((uint64_t)(arg1[16]) * (arg2[14])); + x41 = ((uint64_t)(arg1[16]) * (arg2[13])); + x42 = ((uint64_t)(arg1[16]) * ((arg2[12]) * 0x2)); + x43 = ((uint64_t)(arg1[16]) * (arg2[11])); + x44 = ((uint64_t)(arg1[16]) * ((arg2[10]) * 0x2)); + x45 = ((uint64_t)(arg1[16]) * (arg2[9])); + x46 = ((uint64_t)(arg1[16]) * (arg2[8])); + x47 = ((uint64_t)(arg1[16]) * (arg2[7])); + x48 = ((uint64_t)(arg1[16]) * (arg2[6])); + x49 = ((uint64_t)(arg1[16]) * ((arg2[5]) * 0x2)); + x50 = ((uint64_t)(arg1[16]) * (arg2[4])); + x51 = ((uint64_t)(arg1[16]) * ((arg2[3]) * 0x2)); + x52 = ((uint64_t)(arg1[15]) * ((arg2[18]) * 0x2)); + x53 = ((uint64_t)(arg1[15]) * ((arg2[17]) * 0x2)); + x54 = ((uint64_t)(arg1[15]) * (arg2[16])); + x55 = ((uint64_t)(arg1[15]) * ((arg2[15]) * 0x2)); + x56 = ((uint64_t)(arg1[15]) * (arg2[14])); + x57 = ((uint64_t)(arg1[15]) * ((arg2[13]) * 0x2)); + x58 = ((uint64_t)(arg1[15]) * ((arg2[12]) * 0x2)); + x59 = ((uint64_t)(arg1[15]) * ((arg2[11]) * 0x2)); + x60 = ((uint64_t)(arg1[15]) * ((arg2[10]) * 0x2)); + x61 = ((uint64_t)(arg1[15]) * (arg2[9])); + x62 = ((uint64_t)(arg1[15]) * ((arg2[8]) * 0x2)); + x63 = ((uint64_t)(arg1[15]) * (arg2[7])); + x64 = ((uint64_t)(arg1[15]) * ((arg2[6]) * 0x2)); + x65 = ((uint64_t)(arg1[15]) * ((arg2[5]) * 0x2)); + x66 = ((uint64_t)(arg1[15]) * ((arg2[4]) * 0x2)); + x67 = ((uint64_t)(arg1[14]) * (arg2[18])); + x68 = ((uint64_t)(arg1[14]) * (arg2[17])); + x69 = ((uint64_t)(arg1[14]) * (arg2[16])); + x70 = ((uint64_t)(arg1[14]) * (arg2[15])); + x71 = ((uint64_t)(arg1[14]) * (arg2[14])); + x72 = ((uint64_t)(arg1[14]) * (arg2[13])); + x73 = ((uint64_t)(arg1[14]) * ((arg2[12]) * 0x2)); + x74 = ((uint64_t)(arg1[14]) * (arg2[11])); + x75 = ((uint64_t)(arg1[14]) * (arg2[10])); + x76 = ((uint64_t)(arg1[14]) * (arg2[9])); + x77 = ((uint64_t)(arg1[14]) * (arg2[8])); + x78 = ((uint64_t)(arg1[14]) * (arg2[7])); + x79 = ((uint64_t)(arg1[14]) * (arg2[6])); + x80 = ((uint64_t)(arg1[14]) * ((arg2[5]) * 0x2)); + x81 = ((uint64_t)(arg1[13]) * (arg2[18])); + x82 = ((uint64_t)(arg1[13]) * ((arg2[17]) * 0x2)); + x83 = ((uint64_t)(arg1[13]) * (arg2[16])); + x84 = ((uint64_t)(arg1[13]) * ((arg2[15]) * 0x2)); + x85 = ((uint64_t)(arg1[13]) * (arg2[14])); + x86 = ((uint64_t)(arg1[13]) * ((arg2[13]) * 0x2)); + x87 = ((uint64_t)(arg1[13]) * ((arg2[12]) * 0x2)); + x88 = ((uint64_t)(arg1[13]) * (arg2[11])); + x89 = ((uint64_t)(arg1[13]) * ((arg2[10]) * 0x2)); + x90 = ((uint64_t)(arg1[13]) * (arg2[9])); + x91 = ((uint64_t)(arg1[13]) * ((arg2[8]) * 0x2)); + x92 = ((uint64_t)(arg1[13]) * (arg2[7])); + x93 = ((uint64_t)(arg1[13]) * ((arg2[6]) * 0x2)); + x94 = ((uint64_t)(arg1[12]) * ((arg2[18]) * 0x2)); + x95 = ((uint64_t)(arg1[12]) * ((arg2[17]) * 0x2)); + x96 = ((uint64_t)(arg1[12]) * ((arg2[16]) * 0x2)); + x97 = ((uint64_t)(arg1[12]) * ((arg2[15]) * 0x2)); + x98 = ((uint64_t)(arg1[12]) * ((arg2[14]) * 0x2)); + x99 = ((uint64_t)(arg1[12]) * ((arg2[13]) * 0x2)); + x100 = ((uint64_t)(arg1[12]) * ((arg2[12]) * 0x2)); + x101 = ((uint64_t)(arg1[12]) * ((arg2[11]) * 0x2)); + x102 = ((uint64_t)(arg1[12]) * ((arg2[10]) * 0x2)); + x103 = ((uint64_t)(arg1[12]) * ((arg2[9]) * 0x2)); + x104 = ((uint64_t)(arg1[12]) * ((arg2[8]) * 0x2)); + x105 = ((uint64_t)(arg1[12]) * ((arg2[7]) * 0x2)); + x106 = ((uint64_t)(arg1[11]) * (arg2[18])); + x107 = ((uint64_t)(arg1[11]) * ((arg2[17]) * 0x2)); + x108 = ((uint64_t)(arg1[11]) * (arg2[16])); + x109 = ((uint64_t)(arg1[11]) * ((arg2[15]) * 0x2)); + x110 = ((uint64_t)(arg1[11]) * (arg2[14])); + x111 = ((uint64_t)(arg1[11]) * (arg2[13])); + x112 = ((uint64_t)(arg1[11]) * ((arg2[12]) * 0x2)); + x113 = ((uint64_t)(arg1[11]) * (arg2[11])); + x114 = ((uint64_t)(arg1[11]) * ((arg2[10]) * 0x2)); + x115 = ((uint64_t)(arg1[11]) * (arg2[9])); + x116 = ((uint64_t)(arg1[11]) * ((arg2[8]) * 0x2)); + x117 = ((uint64_t)(arg1[10]) * ((arg2[18]) * 0x2)); + x118 = ((uint64_t)(arg1[10]) * ((arg2[17]) * 0x2)); + x119 = ((uint64_t)(arg1[10]) * ((arg2[16]) * 0x2)); + x120 = ((uint64_t)(arg1[10]) * ((arg2[15]) * 0x2)); + x121 = ((uint64_t)(arg1[10]) * (arg2[14])); + x122 = ((uint64_t)(arg1[10]) * ((arg2[13]) * 0x2)); + x123 = ((uint64_t)(arg1[10]) * ((arg2[12]) * 0x2)); + x124 = ((uint64_t)(arg1[10]) * ((arg2[11]) * 0x2)); + x125 = ((uint64_t)(arg1[10]) * ((arg2[10]) * 0x2)); + x126 = ((uint64_t)(arg1[10]) * ((arg2[9]) * 0x2)); + x127 = ((uint64_t)(arg1[9]) * (arg2[18])); + x128 = ((uint64_t)(arg1[9]) * ((arg2[17]) * 0x2)); + x129 = ((uint64_t)(arg1[9]) * (arg2[16])); + x130 = ((uint64_t)(arg1[9]) * (arg2[15])); + x131 = ((uint64_t)(arg1[9]) * (arg2[14])); + x132 = ((uint64_t)(arg1[9]) * (arg2[13])); + x133 = ((uint64_t)(arg1[9]) * ((arg2[12]) * 0x2)); + x134 = ((uint64_t)(arg1[9]) * (arg2[11])); + x135 = ((uint64_t)(arg1[9]) * ((arg2[10]) * 0x2)); + x136 = ((uint64_t)(arg1[8]) * ((arg2[18]) * 0x2)); + x137 = ((uint64_t)(arg1[8]) * ((arg2[17]) * 0x2)); + x138 = ((uint64_t)(arg1[8]) * (arg2[16])); + x139 = ((uint64_t)(arg1[8]) * ((arg2[15]) * 0x2)); + x140 = ((uint64_t)(arg1[8]) * (arg2[14])); + x141 = ((uint64_t)(arg1[8]) * ((arg2[13]) * 0x2)); + x142 = ((uint64_t)(arg1[8]) * ((arg2[12]) * 0x2)); + x143 = ((uint64_t)(arg1[8]) * ((arg2[11]) * 0x2)); + x144 = ((uint64_t)(arg1[7]) * (arg2[18])); + x145 = ((uint64_t)(arg1[7]) * (arg2[17])); + x146 = ((uint64_t)(arg1[7]) * (arg2[16])); + x147 = ((uint64_t)(arg1[7]) * (arg2[15])); + x148 = ((uint64_t)(arg1[7]) * (arg2[14])); + x149 = ((uint64_t)(arg1[7]) * (arg2[13])); + x150 = ((uint64_t)(arg1[7]) * ((arg2[12]) * 0x2)); + x151 = ((uint64_t)(arg1[6]) * (arg2[18])); + x152 = ((uint64_t)(arg1[6]) * ((arg2[17]) * 0x2)); + x153 = ((uint64_t)(arg1[6]) * (arg2[16])); + x154 = ((uint64_t)(arg1[6]) * ((arg2[15]) * 0x2)); + x155 = ((uint64_t)(arg1[6]) * (arg2[14])); + x156 = ((uint64_t)(arg1[6]) * ((arg2[13]) * 0x2)); + x157 = ((uint64_t)(arg1[5]) * ((arg2[18]) * 0x2)); + x158 = ((uint64_t)(arg1[5]) * ((arg2[17]) * 0x2)); + x159 = ((uint64_t)(arg1[5]) * ((arg2[16]) * 0x2)); + x160 = ((uint64_t)(arg1[5]) * ((arg2[15]) * 0x2)); + x161 = ((uint64_t)(arg1[5]) * ((arg2[14]) * 0x2)); + x162 = ((uint64_t)(arg1[4]) * (arg2[18])); + x163 = ((uint64_t)(arg1[4]) * ((arg2[17]) * 0x2)); + x164 = ((uint64_t)(arg1[4]) * (arg2[16])); + x165 = ((uint64_t)(arg1[4]) * ((arg2[15]) * 0x2)); + x166 = ((uint64_t)(arg1[3]) * ((arg2[18]) * 0x2)); + x167 = ((uint64_t)(arg1[3]) * ((arg2[17]) * 0x2)); + x168 = ((uint64_t)(arg1[3]) * ((arg2[16]) * 0x2)); + x169 = ((uint64_t)(arg1[2]) * (arg2[18])); + x170 = ((uint64_t)(arg1[2]) * ((arg2[17]) * 0x2)); + x171 = ((uint64_t)(arg1[1]) * ((arg2[18]) * 0x2)); + x172 = ((uint64_t)(arg1[18]) * (arg2[0])); + x173 = ((uint64_t)(arg1[17]) * ((arg2[1]) * 0x2)); + x174 = ((uint64_t)(arg1[17]) * (arg2[0])); + x175 = ((uint64_t)(arg1[16]) * (arg2[2])); + x176 = ((uint64_t)(arg1[16]) * (arg2[1])); + x177 = ((uint64_t)(arg1[16]) * (arg2[0])); + x178 = ((uint64_t)(arg1[15]) * ((arg2[3]) * 0x2)); + x179 = ((uint64_t)(arg1[15]) * (arg2[2])); + x180 = ((uint64_t)(arg1[15]) * ((arg2[1]) * 0x2)); + x181 = ((uint64_t)(arg1[15]) * (arg2[0])); + x182 = ((uint64_t)(arg1[14]) * (arg2[4])); + x183 = ((uint64_t)(arg1[14]) * (arg2[3])); + x184 = ((uint64_t)(arg1[14]) * (arg2[2])); + x185 = ((uint64_t)(arg1[14]) * (arg2[1])); + x186 = ((uint64_t)(arg1[14]) * (arg2[0])); + x187 = ((uint64_t)(arg1[13]) * ((arg2[5]) * 0x2)); + x188 = ((uint64_t)(arg1[13]) * (arg2[4])); + x189 = ((uint64_t)(arg1[13]) * ((arg2[3]) * 0x2)); + x190 = ((uint64_t)(arg1[13]) * (arg2[2])); + x191 = ((uint64_t)(arg1[13]) * ((arg2[1]) * 0x2)); + x192 = ((uint64_t)(arg1[13]) * (arg2[0])); + x193 = ((uint64_t)(arg1[12]) * ((arg2[6]) * 0x2)); + x194 = ((uint64_t)(arg1[12]) * ((arg2[5]) * 0x2)); + x195 = ((uint64_t)(arg1[12]) * ((arg2[4]) * 0x2)); + x196 = ((uint64_t)(arg1[12]) * ((arg2[3]) * 0x2)); + x197 = ((uint64_t)(arg1[12]) * ((arg2[2]) * 0x2)); + x198 = ((uint64_t)(arg1[12]) * ((arg2[1]) * 0x2)); + x199 = ((uint64_t)(arg1[12]) * (arg2[0])); + x200 = ((uint64_t)(arg1[11]) * (arg2[7])); + x201 = ((uint64_t)(arg1[11]) * (arg2[6])); + x202 = ((uint64_t)(arg1[11]) * ((arg2[5]) * 0x2)); + x203 = ((uint64_t)(arg1[11]) * (arg2[4])); + x204 = ((uint64_t)(arg1[11]) * ((arg2[3]) * 0x2)); + x205 = ((uint64_t)(arg1[11]) * (arg2[2])); + x206 = ((uint64_t)(arg1[11]) * (arg2[1])); + x207 = ((uint64_t)(arg1[11]) * (arg2[0])); + x208 = ((uint64_t)(arg1[10]) * ((arg2[8]) * 0x2)); + x209 = ((uint64_t)(arg1[10]) * (arg2[7])); + x210 = ((uint64_t)(arg1[10]) * ((arg2[6]) * 0x2)); + x211 = ((uint64_t)(arg1[10]) * ((arg2[5]) * 0x2)); + x212 = ((uint64_t)(arg1[10]) * ((arg2[4]) * 0x2)); + x213 = ((uint64_t)(arg1[10]) * ((arg2[3]) * 0x2)); + x214 = ((uint64_t)(arg1[10]) * (arg2[2])); + x215 = ((uint64_t)(arg1[10]) * ((arg2[1]) * 0x2)); + x216 = ((uint64_t)(arg1[10]) * (arg2[0])); + x217 = ((uint64_t)(arg1[9]) * (arg2[9])); + x218 = ((uint64_t)(arg1[9]) * (arg2[8])); + x219 = ((uint64_t)(arg1[9]) * (arg2[7])); + x220 = ((uint64_t)(arg1[9]) * (arg2[6])); + x221 = ((uint64_t)(arg1[9]) * ((arg2[5]) * 0x2)); + x222 = ((uint64_t)(arg1[9]) * (arg2[4])); + x223 = ((uint64_t)(arg1[9]) * (arg2[3])); + x224 = ((uint64_t)(arg1[9]) * (arg2[2])); + x225 = ((uint64_t)(arg1[9]) * (arg2[1])); + x226 = ((uint64_t)(arg1[9]) * (arg2[0])); + x227 = ((uint64_t)(arg1[8]) * ((arg2[10]) * 0x2)); + x228 = ((uint64_t)(arg1[8]) * (arg2[9])); + x229 = ((uint64_t)(arg1[8]) * ((arg2[8]) * 0x2)); + x230 = ((uint64_t)(arg1[8]) * (arg2[7])); + x231 = ((uint64_t)(arg1[8]) * ((arg2[6]) * 0x2)); + x232 = ((uint64_t)(arg1[8]) * ((arg2[5]) * 0x2)); + x233 = ((uint64_t)(arg1[8]) * (arg2[4])); + x234 = ((uint64_t)(arg1[8]) * ((arg2[3]) * 0x2)); + x235 = ((uint64_t)(arg1[8]) * (arg2[2])); + x236 = ((uint64_t)(arg1[8]) * ((arg2[1]) * 0x2)); + x237 = ((uint64_t)(arg1[8]) * (arg2[0])); + x238 = ((uint64_t)(arg1[7]) * (arg2[11])); + x239 = ((uint64_t)(arg1[7]) * (arg2[10])); + x240 = ((uint64_t)(arg1[7]) * (arg2[9])); + x241 = ((uint64_t)(arg1[7]) * (arg2[8])); + x242 = ((uint64_t)(arg1[7]) * (arg2[7])); + x243 = ((uint64_t)(arg1[7]) * (arg2[6])); + x244 = ((uint64_t)(arg1[7]) * (arg2[5])); + x245 = ((uint64_t)(arg1[7]) * (arg2[4])); + x246 = ((uint64_t)(arg1[7]) * (arg2[3])); + x247 = ((uint64_t)(arg1[7]) * (arg2[2])); + x248 = ((uint64_t)(arg1[7]) * (arg2[1])); + x249 = ((uint64_t)(arg1[7]) * (arg2[0])); + x250 = ((uint64_t)(arg1[6]) * ((arg2[12]) * 0x2)); + x251 = ((uint64_t)(arg1[6]) * (arg2[11])); + x252 = ((uint64_t)(arg1[6]) * ((arg2[10]) * 0x2)); + x253 = ((uint64_t)(arg1[6]) * (arg2[9])); + x254 = ((uint64_t)(arg1[6]) * ((arg2[8]) * 0x2)); + x255 = ((uint64_t)(arg1[6]) * (arg2[7])); + x256 = ((uint64_t)(arg1[6]) * (arg2[6])); + x257 = ((uint64_t)(arg1[6]) * ((arg2[5]) * 0x2)); + x258 = ((uint64_t)(arg1[6]) * (arg2[4])); + x259 = ((uint64_t)(arg1[6]) * ((arg2[3]) * 0x2)); + x260 = ((uint64_t)(arg1[6]) * (arg2[2])); + x261 = ((uint64_t)(arg1[6]) * ((arg2[1]) * 0x2)); + x262 = ((uint64_t)(arg1[6]) * (arg2[0])); + x263 = ((uint64_t)(arg1[5]) * ((arg2[13]) * 0x2)); + x264 = ((uint64_t)(arg1[5]) * ((arg2[12]) * 0x2)); + x265 = ((uint64_t)(arg1[5]) * ((arg2[11]) * 0x2)); + x266 = ((uint64_t)(arg1[5]) * ((arg2[10]) * 0x2)); + x267 = ((uint64_t)(arg1[5]) * ((arg2[9]) * 0x2)); + x268 = ((uint64_t)(arg1[5]) * ((arg2[8]) * 0x2)); + x269 = ((uint64_t)(arg1[5]) * (arg2[7])); + x270 = ((uint64_t)(arg1[5]) * ((arg2[6]) * 0x2)); + x271 = ((uint64_t)(arg1[5]) * ((arg2[5]) * 0x2)); + x272 = ((uint64_t)(arg1[5]) * ((arg2[4]) * 0x2)); + x273 = ((uint64_t)(arg1[5]) * ((arg2[3]) * 0x2)); + x274 = ((uint64_t)(arg1[5]) * ((arg2[2]) * 0x2)); + x275 = ((uint64_t)(arg1[5]) * ((arg2[1]) * 0x2)); + x276 = ((uint64_t)(arg1[5]) * (arg2[0])); + x277 = ((uint64_t)(arg1[4]) * (arg2[14])); + x278 = ((uint64_t)(arg1[4]) * (arg2[13])); + x279 = ((uint64_t)(arg1[4]) * ((arg2[12]) * 0x2)); + x280 = ((uint64_t)(arg1[4]) * (arg2[11])); + x281 = ((uint64_t)(arg1[4]) * ((arg2[10]) * 0x2)); + x282 = ((uint64_t)(arg1[4]) * (arg2[9])); + x283 = ((uint64_t)(arg1[4]) * (arg2[8])); + x284 = ((uint64_t)(arg1[4]) * (arg2[7])); + x285 = ((uint64_t)(arg1[4]) * (arg2[6])); + x286 = ((uint64_t)(arg1[4]) * ((arg2[5]) * 0x2)); + x287 = ((uint64_t)(arg1[4]) * (arg2[4])); + x288 = ((uint64_t)(arg1[4]) * ((arg2[3]) * 0x2)); + x289 = ((uint64_t)(arg1[4]) * (arg2[2])); + x290 = ((uint64_t)(arg1[4]) * (arg2[1])); + x291 = ((uint64_t)(arg1[4]) * (arg2[0])); + x292 = ((uint64_t)(arg1[3]) * ((arg2[15]) * 0x2)); + x293 = ((uint64_t)(arg1[3]) * (arg2[14])); + x294 = ((uint64_t)(arg1[3]) * ((arg2[13]) * 0x2)); + x295 = ((uint64_t)(arg1[3]) * ((arg2[12]) * 0x2)); + x296 = ((uint64_t)(arg1[3]) * ((arg2[11]) * 0x2)); + x297 = ((uint64_t)(arg1[3]) * ((arg2[10]) * 0x2)); + x298 = ((uint64_t)(arg1[3]) * (arg2[9])); + x299 = ((uint64_t)(arg1[3]) * ((arg2[8]) * 0x2)); + x300 = ((uint64_t)(arg1[3]) * (arg2[7])); + x301 = ((uint64_t)(arg1[3]) * ((arg2[6]) * 0x2)); + x302 = ((uint64_t)(arg1[3]) * ((arg2[5]) * 0x2)); + x303 = ((uint64_t)(arg1[3]) * ((arg2[4]) * 0x2)); + x304 = ((uint64_t)(arg1[3]) * ((arg2[3]) * 0x2)); + x305 = ((uint64_t)(arg1[3]) * (arg2[2])); + x306 = ((uint64_t)(arg1[3]) * ((arg2[1]) * 0x2)); + x307 = ((uint64_t)(arg1[3]) * (arg2[0])); + x308 = ((uint64_t)(arg1[2]) * (arg2[16])); + x309 = ((uint64_t)(arg1[2]) * (arg2[15])); + x310 = ((uint64_t)(arg1[2]) * (arg2[14])); + x311 = ((uint64_t)(arg1[2]) * (arg2[13])); + x312 = ((uint64_t)(arg1[2]) * ((arg2[12]) * 0x2)); + x313 = ((uint64_t)(arg1[2]) * (arg2[11])); + x314 = ((uint64_t)(arg1[2]) * (arg2[10])); + x315 = ((uint64_t)(arg1[2]) * (arg2[9])); + x316 = ((uint64_t)(arg1[2]) * (arg2[8])); + x317 = ((uint64_t)(arg1[2]) * (arg2[7])); + x318 = ((uint64_t)(arg1[2]) * (arg2[6])); + x319 = ((uint64_t)(arg1[2]) * ((arg2[5]) * 0x2)); + x320 = ((uint64_t)(arg1[2]) * (arg2[4])); + x321 = ((uint64_t)(arg1[2]) * (arg2[3])); + x322 = ((uint64_t)(arg1[2]) * (arg2[2])); + x323 = ((uint64_t)(arg1[2]) * (arg2[1])); + x324 = ((uint64_t)(arg1[2]) * (arg2[0])); + x325 = ((uint64_t)(arg1[1]) * ((arg2[17]) * 0x2)); + x326 = ((uint64_t)(arg1[1]) * (arg2[16])); + x327 = ((uint64_t)(arg1[1]) * ((arg2[15]) * 0x2)); + x328 = ((uint64_t)(arg1[1]) * (arg2[14])); + x329 = ((uint64_t)(arg1[1]) * ((arg2[13]) * 0x2)); + x330 = ((uint64_t)(arg1[1]) * ((arg2[12]) * 0x2)); + x331 = ((uint64_t)(arg1[1]) * (arg2[11])); + x332 = ((uint64_t)(arg1[1]) * ((arg2[10]) * 0x2)); + x333 = ((uint64_t)(arg1[1]) * (arg2[9])); + x334 = ((uint64_t)(arg1[1]) * ((arg2[8]) * 0x2)); + x335 = ((uint64_t)(arg1[1]) * (arg2[7])); + x336 = ((uint64_t)(arg1[1]) * ((arg2[6]) * 0x2)); + x337 = ((uint64_t)(arg1[1]) * ((arg2[5]) * 0x2)); + x338 = ((uint64_t)(arg1[1]) * (arg2[4])); + x339 = ((uint64_t)(arg1[1]) * ((arg2[3]) * 0x2)); + x340 = ((uint64_t)(arg1[1]) * (arg2[2])); + x341 = ((uint64_t)(arg1[1]) * ((arg2[1]) * 0x2)); + x342 = ((uint64_t)(arg1[1]) * (arg2[0])); + x343 = ((uint64_t)(arg1[0]) * (arg2[18])); + x344 = ((uint64_t)(arg1[0]) * (arg2[17])); + x345 = ((uint64_t)(arg1[0]) * (arg2[16])); + x346 = ((uint64_t)(arg1[0]) * (arg2[15])); + x347 = ((uint64_t)(arg1[0]) * (arg2[14])); + x348 = ((uint64_t)(arg1[0]) * (arg2[13])); + x349 = ((uint64_t)(arg1[0]) * (arg2[12])); + x350 = ((uint64_t)(arg1[0]) * (arg2[11])); + x351 = ((uint64_t)(arg1[0]) * (arg2[10])); + x352 = ((uint64_t)(arg1[0]) * (arg2[9])); + x353 = ((uint64_t)(arg1[0]) * (arg2[8])); + x354 = ((uint64_t)(arg1[0]) * (arg2[7])); + x355 = ((uint64_t)(arg1[0]) * (arg2[6])); + x356 = ((uint64_t)(arg1[0]) * (arg2[5])); + x357 = ((uint64_t)(arg1[0]) * (arg2[4])); + x358 = ((uint64_t)(arg1[0]) * (arg2[3])); + x359 = ((uint64_t)(arg1[0]) * (arg2[2])); + x360 = ((uint64_t)(arg1[0]) * (arg2[1])); + x361 = ((uint64_t)(arg1[0]) * (arg2[0])); + x362 = + (x361 + + (x171 + + (x170 + + (x168 + + (x165 + + (x161 + + (x156 + + (x150 + + (x143 + + (x135 + + (x126 + + (x116 + + (x105 + + (x93 + (x80 + (x66 + (x51 + (x35 + x18)))))))))))))))))); + x363 = (x362 >> 28); + x364 = (uint32_t)(x362 & UINT32_C(0xfffffff)); + x365 = (x343 + + (x325 + + (x308 + + (x292 + + (x277 + + (x263 + + (x250 + + (x238 + + (x227 + + (x217 + + (x208 + + (x200 + + (x193 + + (x187 + + (x182 + (x178 + (x175 + (x173 + x172)))))))))))))))))); + x366 = (x344 + + (x326 + + (x309 + + (x293 + + (x278 + + (x264 + + (x251 + + (x239 + + (x228 + + (x218 + + (x209 + + (x201 + + (x194 + + (x188 + + (x183 + (x179 + (x176 + (x174 + x1)))))))))))))))))); + x367 = (x345 + + (x327 + + (x310 + + (x294 + + (x279 + + (x265 + + (x252 + + (x240 + + (x229 + + (x219 + + (x210 + + (x202 + + (x195 + + (x189 + + (x184 + (x180 + (x177 + (x19 + x2)))))))))))))))))); + x368 = + (x346 + + (x328 + + (x311 + + (x295 + + (x280 + + (x266 + + (x253 + + (x241 + + (x230 + + (x220 + + (x211 + + (x203 + + (x196 + + (x190 + (x185 + (x181 + (x36 + (x20 + x3)))))))))))))))))); + x369 = + (x347 + + (x329 + + (x312 + + (x296 + + (x281 + + (x267 + + (x254 + + (x242 + + (x231 + + (x221 + + (x212 + + (x204 + + (x197 + + (x191 + (x186 + (x52 + (x37 + (x21 + x4)))))))))))))))))); + x370 = + (x348 + + (x330 + + (x313 + + (x297 + + (x282 + + (x268 + + (x255 + + (x243 + + (x232 + + (x222 + + (x213 + + (x205 + + (x198 + + (x192 + (x67 + (x53 + (x38 + (x22 + x5)))))))))))))))))); + x371 = (x349 + + (x331 + + (x314 + + (x298 + + (x283 + + (x269 + + (x256 + + (x244 + + (x233 + + (x223 + + (x214 + + (x206 + + (x199 + + (x81 + (x68 + (x54 + (x39 + (x23 + x6)))))))))))))))))); + x372 = (x350 + + (x332 + + (x315 + + (x299 + + (x284 + + (x270 + + (x257 + + (x245 + + (x234 + + (x224 + + (x215 + + (x207 + + (x94 + + (x82 + (x69 + (x55 + (x40 + (x24 + x7)))))))))))))))))); + x373 = (x351 + + (x333 + + (x316 + + (x300 + + (x285 + + (x271 + + (x258 + + (x246 + + (x235 + + (x225 + + (x216 + + (x106 + + (x95 + + (x83 + (x70 + (x56 + (x41 + (x25 + x8)))))))))))))))))); + x374 = (x352 + + (x334 + + (x317 + + (x301 + + (x286 + + (x272 + + (x259 + + (x247 + + (x236 + + (x226 + + (x117 + + (x107 + + (x96 + + (x84 + (x71 + (x57 + (x42 + (x26 + x9)))))))))))))))))); + x375 = + (x353 + + (x335 + + (x318 + + (x302 + + (x287 + + (x273 + + (x260 + + (x248 + + (x237 + + (x127 + + (x118 + + (x108 + + (x97 + + (x85 + (x72 + (x58 + (x43 + (x27 + x10)))))))))))))))))); + x376 = + (x354 + + (x336 + + (x319 + + (x303 + + (x288 + + (x274 + + (x261 + + (x249 + + (x136 + + (x128 + + (x119 + + (x109 + + (x98 + + (x86 + (x73 + (x59 + (x44 + (x28 + x11)))))))))))))))))); + x377 = + (x355 + + (x337 + + (x320 + + (x304 + + (x289 + + (x275 + + (x262 + + (x144 + + (x137 + + (x129 + + (x120 + + (x110 + + (x99 + + (x87 + (x74 + (x60 + (x45 + (x29 + x12)))))))))))))))))); + x378 = + (x356 + + (x338 + + (x321 + + (x305 + + (x290 + + (x276 + + (x151 + + (x145 + + (x138 + + (x130 + + (x121 + + (x111 + + (x100 + + (x88 + (x75 + (x61 + (x46 + (x30 + x13)))))))))))))))))); + x379 = + (x357 + + (x339 + + (x322 + + (x306 + + (x291 + + (x157 + + (x152 + + (x146 + + (x139 + + (x131 + + (x122 + + (x112 + + (x101 + + (x89 + (x76 + (x62 + (x47 + (x31 + x14)))))))))))))))))); + x380 = + (x358 + + (x340 + + (x323 + + (x307 + + (x162 + + (x158 + + (x153 + + (x147 + + (x140 + + (x132 + + (x123 + + (x113 + + (x102 + + (x90 + (x77 + (x63 + (x48 + (x32 + x15)))))))))))))))))); + x381 = + (x359 + + (x341 + + (x324 + + (x166 + + (x163 + + (x159 + + (x154 + + (x148 + + (x141 + + (x133 + + (x124 + + (x114 + + (x103 + + (x91 + (x78 + (x64 + (x49 + (x33 + x16)))))))))))))))))); + x382 = + (x360 + + (x342 + + (x169 + + (x167 + + (x164 + + (x160 + + (x155 + + (x149 + + (x142 + + (x134 + + (x125 + + (x115 + + (x104 + + (x92 + (x79 + (x65 + (x50 + (x34 + x17)))))))))))))))))); + x383 = (x363 + x382); + x384 = (x383 >> 27); + x385 = (uint32_t)(x383 & UINT32_C(0x7ffffff)); + x386 = (x384 + x381); + x387 = (x386 >> 28); + x388 = (uint32_t)(x386 & UINT32_C(0xfffffff)); + x389 = (x387 + x380); + x390 = (x389 >> 27); + x391 = (uint32_t)(x389 & UINT32_C(0x7ffffff)); + x392 = (x390 + x379); + x393 = (x392 >> 28); + x394 = (uint32_t)(x392 & UINT32_C(0xfffffff)); + x395 = (x393 + x378); + x396 = (x395 >> 27); + x397 = (uint32_t)(x395 & UINT32_C(0x7ffffff)); + x398 = (x396 + x377); + x399 = (x398 >> 27); + x400 = (uint32_t)(x398 & UINT32_C(0x7ffffff)); + x401 = (x399 + x376); + x402 = (x401 >> 28); + x403 = (uint32_t)(x401 & UINT32_C(0xfffffff)); + x404 = (x402 + x375); + x405 = (x404 >> 27); + x406 = (uint32_t)(x404 & UINT32_C(0x7ffffff)); + x407 = (x405 + x374); + x408 = (x407 >> 28); + x409 = (uint32_t)(x407 & UINT32_C(0xfffffff)); + x410 = (x408 + x373); + x411 = (x410 >> 27); + x412 = (uint32_t)(x410 & UINT32_C(0x7ffffff)); + x413 = (x411 + x372); + x414 = (x413 >> 28); + x415 = (uint32_t)(x413 & UINT32_C(0xfffffff)); + x416 = (x414 + x371); + x417 = (x416 >> 27); + x418 = (uint32_t)(x416 & UINT32_C(0x7ffffff)); + x419 = (x417 + x370); + x420 = (x419 >> 27); + x421 = (uint32_t)(x419 & UINT32_C(0x7ffffff)); + x422 = (x420 + x369); + x423 = (x422 >> 28); + x424 = (uint32_t)(x422 & UINT32_C(0xfffffff)); + x425 = (x423 + x368); + x426 = (x425 >> 27); + x427 = (uint32_t)(x425 & UINT32_C(0x7ffffff)); + x428 = (x426 + x367); + x429 = (x428 >> 28); + x430 = (uint32_t)(x428 & UINT32_C(0xfffffff)); + x431 = (x429 + x366); + x432 = (x431 >> 27); + x433 = (uint32_t)(x431 & UINT32_C(0x7ffffff)); + x434 = (x432 + x365); + x435 = (x434 >> 27); + x436 = (uint32_t)(x434 & UINT32_C(0x7ffffff)); + x437 = (x364 + x435); + x438 = (uint32_t)(x437 >> 28); + x439 = (uint32_t)(x437 & UINT32_C(0xfffffff)); + x440 = (x438 + x385); + x441 = (fiat_secp521r1_uint1)(x440 >> 27); + x442 = (x440 & UINT32_C(0x7ffffff)); + x443 = (x441 + x388); + out1[0] = x439; + out1[1] = x442; + out1[2] = x443; + out1[3] = x391; + out1[4] = x394; + out1[5] = x397; + out1[6] = x400; + out1[7] = x403; + out1[8] = x406; + out1[9] = x409; + out1[10] = x412; + out1[11] = x415; + out1[12] = x418; + out1[13] = x421; + out1[14] = x424; + out1[15] = x427; + out1[16] = x430; + out1[17] = x433; + out1[18] = x436; +} + +/* + * The function fiat_secp521r1_carry_square squares a field element and reduces the result. + * + * Postconditions: + * eval out1 mod m = (eval arg1 * eval arg1) mod m + * + */ +static void +fiat_secp521r1_carry_square( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + uint64_t x45; + uint64_t x46; + uint64_t x47; + uint64_t x48; + uint64_t x49; + uint64_t x50; + uint64_t x51; + uint64_t x52; + uint64_t x53; + uint64_t x54; + uint64_t x55; + uint64_t x56; + uint64_t x57; + uint64_t x58; + uint64_t x59; + uint64_t x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + uint64_t x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + uint64_t x74; + uint64_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + uint64_t x82; + uint64_t x83; + uint64_t x84; + uint64_t x85; + uint64_t x86; + uint64_t x87; + uint64_t x88; + uint64_t x89; + uint64_t x90; + uint64_t x91; + uint64_t x92; + uint64_t x93; + uint64_t x94; + uint64_t x95; + uint64_t x96; + uint64_t x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + uint64_t x104; + uint64_t x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + uint64_t x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + uint64_t x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + uint64_t x121; + uint64_t x122; + uint64_t x123; + uint64_t x124; + uint64_t x125; + uint64_t x126; + uint64_t x127; + uint64_t x128; + uint64_t x129; + uint64_t x130; + uint64_t x131; + uint64_t x132; + uint64_t x133; + uint64_t x134; + uint64_t x135; + uint64_t x136; + uint64_t x137; + uint64_t x138; + uint64_t x139; + uint64_t x140; + uint64_t x141; + uint64_t x142; + uint64_t x143; + uint64_t x144; + uint64_t x145; + uint64_t x146; + uint64_t x147; + uint64_t x148; + uint64_t x149; + uint64_t x150; + uint64_t x151; + uint64_t x152; + uint64_t x153; + uint64_t x154; + uint64_t x155; + uint64_t x156; + uint64_t x157; + uint64_t x158; + uint64_t x159; + uint64_t x160; + uint64_t x161; + uint64_t x162; + uint64_t x163; + uint64_t x164; + uint64_t x165; + uint64_t x166; + uint64_t x167; + uint64_t x168; + uint64_t x169; + uint64_t x170; + uint64_t x171; + uint64_t x172; + uint64_t x173; + uint64_t x174; + uint64_t x175; + uint64_t x176; + uint64_t x177; + uint64_t x178; + uint64_t x179; + uint64_t x180; + uint64_t x181; + uint64_t x182; + uint64_t x183; + uint64_t x184; + uint64_t x185; + uint64_t x186; + uint64_t x187; + uint64_t x188; + uint64_t x189; + uint64_t x190; + uint64_t x191; + uint64_t x192; + uint64_t x193; + uint64_t x194; + uint64_t x195; + uint64_t x196; + uint64_t x197; + uint64_t x198; + uint64_t x199; + uint64_t x200; + uint64_t x201; + uint64_t x202; + uint64_t x203; + uint64_t x204; + uint64_t x205; + uint64_t x206; + uint64_t x207; + uint64_t x208; + uint64_t x209; + uint64_t x210; + uint64_t x211; + uint64_t x212; + uint64_t x213; + uint64_t x214; + uint64_t x215; + uint64_t x216; + uint64_t x217; + uint64_t x218; + uint64_t x219; + uint64_t x220; + uint64_t x221; + uint64_t x222; + uint64_t x223; + uint64_t x224; + uint64_t x225; + uint64_t x226; + uint64_t x227; + uint64_t x228; + uint32_t x229; + uint64_t x230; + uint64_t x231; + uint64_t x232; + uint64_t x233; + uint64_t x234; + uint64_t x235; + uint64_t x236; + uint64_t x237; + uint64_t x238; + uint64_t x239; + uint64_t x240; + uint64_t x241; + uint64_t x242; + uint64_t x243; + uint64_t x244; + uint64_t x245; + uint64_t x246; + uint64_t x247; + uint64_t x248; + uint64_t x249; + uint32_t x250; + uint64_t x251; + uint64_t x252; + uint32_t x253; + uint64_t x254; + uint64_t x255; + uint32_t x256; + uint64_t x257; + uint64_t x258; + uint32_t x259; + uint64_t x260; + uint64_t x261; + uint32_t x262; + uint64_t x263; + uint64_t x264; + uint32_t x265; + uint64_t x266; + uint64_t x267; + uint32_t x268; + uint64_t x269; + uint64_t x270; + uint32_t x271; + uint64_t x272; + uint64_t x273; + uint32_t x274; + uint64_t x275; + uint64_t x276; + uint32_t x277; + uint64_t x278; + uint64_t x279; + uint32_t x280; + uint64_t x281; + uint64_t x282; + uint32_t x283; + uint64_t x284; + uint64_t x285; + uint32_t x286; + uint64_t x287; + uint64_t x288; + uint32_t x289; + uint64_t x290; + uint64_t x291; + uint32_t x292; + uint64_t x293; + uint64_t x294; + uint32_t x295; + uint64_t x296; + uint64_t x297; + uint32_t x298; + uint64_t x299; + uint64_t x300; + uint32_t x301; + uint64_t x302; + uint32_t x303; + uint32_t x304; + uint32_t x305; + fiat_secp521r1_uint1 x306; + uint32_t x307; + uint32_t x308; + x1 = (arg1[18]); + x2 = (x1 * 0x2); + x3 = ((arg1[18]) * 0x2); + x4 = (arg1[17]); + x5 = (x4 * 0x2); + x6 = ((arg1[17]) * 0x2); + x7 = (arg1[16]); + x8 = (x7 * 0x2); + x9 = ((arg1[16]) * 0x2); + x10 = (arg1[15]); + x11 = (x10 * 0x2); + x12 = ((arg1[15]) * 0x2); + x13 = (arg1[14]); + x14 = (x13 * 0x2); + x15 = ((arg1[14]) * 0x2); + x16 = (arg1[13]); + x17 = (x16 * 0x2); + x18 = ((arg1[13]) * 0x2); + x19 = (arg1[12]); + x20 = (x19 * 0x2); + x21 = ((arg1[12]) * 0x2); + x22 = (arg1[11]); + x23 = (x22 * 0x2); + x24 = ((arg1[11]) * 0x2); + x25 = (arg1[10]); + x26 = (x25 * 0x2); + x27 = ((arg1[10]) * 0x2); + x28 = ((arg1[9]) * 0x2); + x29 = ((arg1[8]) * 0x2); + x30 = ((arg1[7]) * 0x2); + x31 = ((arg1[6]) * 0x2); + x32 = ((arg1[5]) * 0x2); + x33 = ((arg1[4]) * 0x2); + x34 = ((arg1[3]) * 0x2); + x35 = ((arg1[2]) * 0x2); + x36 = ((arg1[1]) * 0x2); + x37 = ((uint64_t)(arg1[18]) * x1); + x38 = ((uint64_t)(arg1[17]) * (x2 * 0x2)); + x39 = ((uint64_t)(arg1[17]) * (x4 * 0x2)); + x40 = ((uint64_t)(arg1[16]) * x2); + x41 = ((uint64_t)(arg1[16]) * (x5 * 0x2)); + x42 = ((uint64_t)(arg1[16]) * x7); + x43 = ((uint64_t)(arg1[15]) * (x2 * 0x2)); + x44 = ((uint64_t)(arg1[15]) * (x5 * 0x2)); + x45 = ((uint64_t)(arg1[15]) * x8); + x46 = ((uint64_t)(arg1[15]) * (x10 * 0x2)); + x47 = ((uint64_t)(arg1[14]) * x2); + x48 = ((uint64_t)(arg1[14]) * x5); + x49 = ((uint64_t)(arg1[14]) * x8); + x50 = ((uint64_t)(arg1[14]) * x11); + x51 = ((uint64_t)(arg1[14]) * x13); + x52 = ((uint64_t)(arg1[13]) * x2); + x53 = ((uint64_t)(arg1[13]) * (x5 * 0x2)); + x54 = ((uint64_t)(arg1[13]) * x8); + x55 = ((uint64_t)(arg1[13]) * (x11 * 0x2)); + x56 = ((uint64_t)(arg1[13]) * x14); + x57 = ((uint64_t)(arg1[13]) * (x16 * 0x2)); + x58 = ((uint64_t)(arg1[12]) * (x2 * 0x2)); + x59 = ((uint64_t)(arg1[12]) * (x5 * 0x2)); + x60 = ((uint64_t)(arg1[12]) * (x8 * 0x2)); + x61 = ((uint64_t)(arg1[12]) * (x11 * 0x2)); + x62 = ((uint64_t)(arg1[12]) * (x14 * 0x2)); + x63 = ((uint64_t)(arg1[12]) * (x17 * 0x2)); + x64 = ((uint64_t)(arg1[12]) * (x19 * 0x2)); + x65 = ((uint64_t)(arg1[11]) * x2); + x66 = ((uint64_t)(arg1[11]) * (x5 * 0x2)); + x67 = ((uint64_t)(arg1[11]) * x8); + x68 = ((uint64_t)(arg1[11]) * (x11 * 0x2)); + x69 = ((uint64_t)(arg1[11]) * x14); + x70 = ((uint64_t)(arg1[11]) * x17); + x71 = ((uint64_t)(arg1[11]) * (x20 * 0x2)); + x72 = ((uint64_t)(arg1[11]) * x22); + x73 = ((uint64_t)(arg1[10]) * (x2 * 0x2)); + x74 = ((uint64_t)(arg1[10]) * (x5 * 0x2)); + x75 = ((uint64_t)(arg1[10]) * (x8 * 0x2)); + x76 = ((uint64_t)(arg1[10]) * (x11 * 0x2)); + x77 = ((uint64_t)(arg1[10]) * x14); + x78 = ((uint64_t)(arg1[10]) * (x17 * 0x2)); + x79 = ((uint64_t)(arg1[10]) * (x20 * 0x2)); + x80 = ((uint64_t)(arg1[10]) * (x23 * 0x2)); + x81 = ((uint64_t)(arg1[10]) * (x25 * 0x2)); + x82 = ((uint64_t)(arg1[9]) * x2); + x83 = ((uint64_t)(arg1[9]) * (x5 * 0x2)); + x84 = ((uint64_t)(arg1[9]) * x8); + x85 = ((uint64_t)(arg1[9]) * x11); + x86 = ((uint64_t)(arg1[9]) * x14); + x87 = ((uint64_t)(arg1[9]) * x17); + x88 = ((uint64_t)(arg1[9]) * (x20 * 0x2)); + x89 = ((uint64_t)(arg1[9]) * x23); + x90 = ((uint64_t)(arg1[9]) * (x26 * 0x2)); + x91 = ((uint64_t)(arg1[9]) * (arg1[9])); + x92 = ((uint64_t)(arg1[8]) * (x2 * 0x2)); + x93 = ((uint64_t)(arg1[8]) * (x5 * 0x2)); + x94 = ((uint64_t)(arg1[8]) * x8); + x95 = ((uint64_t)(arg1[8]) * (x11 * 0x2)); + x96 = ((uint64_t)(arg1[8]) * x14); + x97 = ((uint64_t)(arg1[8]) * (x17 * 0x2)); + x98 = ((uint64_t)(arg1[8]) * (x20 * 0x2)); + x99 = ((uint64_t)(arg1[8]) * (x23 * 0x2)); + x100 = ((uint64_t)(arg1[8]) * (x27 * 0x2)); + x101 = ((uint64_t)(arg1[8]) * x28); + x102 = ((uint64_t)(arg1[8]) * ((arg1[8]) * 0x2)); + x103 = ((uint64_t)(arg1[7]) * x2); + x104 = ((uint64_t)(arg1[7]) * x5); + x105 = ((uint64_t)(arg1[7]) * x8); + x106 = ((uint64_t)(arg1[7]) * x11); + x107 = ((uint64_t)(arg1[7]) * x14); + x108 = ((uint64_t)(arg1[7]) * x17); + x109 = ((uint64_t)(arg1[7]) * (x20 * 0x2)); + x110 = ((uint64_t)(arg1[7]) * x24); + x111 = ((uint64_t)(arg1[7]) * x27); + x112 = ((uint64_t)(arg1[7]) * x28); + x113 = ((uint64_t)(arg1[7]) * x29); + x114 = ((uint64_t)(arg1[7]) * (arg1[7])); + x115 = ((uint64_t)(arg1[6]) * x2); + x116 = ((uint64_t)(arg1[6]) * (x5 * 0x2)); + x117 = ((uint64_t)(arg1[6]) * x8); + x118 = ((uint64_t)(arg1[6]) * (x11 * 0x2)); + x119 = ((uint64_t)(arg1[6]) * x14); + x120 = ((uint64_t)(arg1[6]) * (x17 * 0x2)); + x121 = ((uint64_t)(arg1[6]) * (x21 * 0x2)); + x122 = ((uint64_t)(arg1[6]) * x24); + x123 = ((uint64_t)(arg1[6]) * (x27 * 0x2)); + x124 = ((uint64_t)(arg1[6]) * x28); + x125 = ((uint64_t)(arg1[6]) * (x29 * 0x2)); + x126 = ((uint64_t)(arg1[6]) * x30); + x127 = ((uint64_t)(arg1[6]) * (arg1[6])); + x128 = ((uint64_t)(arg1[5]) * (x2 * 0x2)); + x129 = ((uint64_t)(arg1[5]) * (x5 * 0x2)); + x130 = ((uint64_t)(arg1[5]) * (x8 * 0x2)); + x131 = ((uint64_t)(arg1[5]) * (x11 * 0x2)); + x132 = ((uint64_t)(arg1[5]) * (x14 * 0x2)); + x133 = ((uint64_t)(arg1[5]) * (x18 * 0x2)); + x134 = ((uint64_t)(arg1[5]) * (x21 * 0x2)); + x135 = ((uint64_t)(arg1[5]) * (x24 * 0x2)); + x136 = ((uint64_t)(arg1[5]) * (x27 * 0x2)); + x137 = ((uint64_t)(arg1[5]) * (x28 * 0x2)); + x138 = ((uint64_t)(arg1[5]) * (x29 * 0x2)); + x139 = ((uint64_t)(arg1[5]) * x30); + x140 = ((uint64_t)(arg1[5]) * (x31 * 0x2)); + x141 = ((uint64_t)(arg1[5]) * ((arg1[5]) * 0x2)); + x142 = ((uint64_t)(arg1[4]) * x2); + x143 = ((uint64_t)(arg1[4]) * (x5 * 0x2)); + x144 = ((uint64_t)(arg1[4]) * x8); + x145 = ((uint64_t)(arg1[4]) * (x11 * 0x2)); + x146 = ((uint64_t)(arg1[4]) * x15); + x147 = ((uint64_t)(arg1[4]) * x18); + x148 = ((uint64_t)(arg1[4]) * (x21 * 0x2)); + x149 = ((uint64_t)(arg1[4]) * x24); + x150 = ((uint64_t)(arg1[4]) * (x27 * 0x2)); + x151 = ((uint64_t)(arg1[4]) * x28); + x152 = ((uint64_t)(arg1[4]) * x29); + x153 = ((uint64_t)(arg1[4]) * x30); + x154 = ((uint64_t)(arg1[4]) * x31); + x155 = ((uint64_t)(arg1[4]) * (x32 * 0x2)); + x156 = ((uint64_t)(arg1[4]) * (arg1[4])); + x157 = ((uint64_t)(arg1[3]) * (x2 * 0x2)); + x158 = ((uint64_t)(arg1[3]) * (x5 * 0x2)); + x159 = ((uint64_t)(arg1[3]) * (x8 * 0x2)); + x160 = ((uint64_t)(arg1[3]) * (x12 * 0x2)); + x161 = ((uint64_t)(arg1[3]) * x15); + x162 = ((uint64_t)(arg1[3]) * (x18 * 0x2)); + x163 = ((uint64_t)(arg1[3]) * (x21 * 0x2)); + x164 = ((uint64_t)(arg1[3]) * (x24 * 0x2)); + x165 = ((uint64_t)(arg1[3]) * (x27 * 0x2)); + x166 = ((uint64_t)(arg1[3]) * x28); + x167 = ((uint64_t)(arg1[3]) * (x29 * 0x2)); + x168 = ((uint64_t)(arg1[3]) * x30); + x169 = ((uint64_t)(arg1[3]) * (x31 * 0x2)); + x170 = ((uint64_t)(arg1[3]) * (x32 * 0x2)); + x171 = ((uint64_t)(arg1[3]) * (x33 * 0x2)); + x172 = ((uint64_t)(arg1[3]) * ((arg1[3]) * 0x2)); + x173 = ((uint64_t)(arg1[2]) * x2); + x174 = ((uint64_t)(arg1[2]) * (x5 * 0x2)); + x175 = ((uint64_t)(arg1[2]) * x9); + x176 = ((uint64_t)(arg1[2]) * x12); + x177 = ((uint64_t)(arg1[2]) * x15); + x178 = ((uint64_t)(arg1[2]) * x18); + x179 = ((uint64_t)(arg1[2]) * (x21 * 0x2)); + x180 = ((uint64_t)(arg1[2]) * x24); + x181 = ((uint64_t)(arg1[2]) * x27); + x182 = ((uint64_t)(arg1[2]) * x28); + x183 = ((uint64_t)(arg1[2]) * x29); + x184 = ((uint64_t)(arg1[2]) * x30); + x185 = ((uint64_t)(arg1[2]) * x31); + x186 = ((uint64_t)(arg1[2]) * (x32 * 0x2)); + x187 = ((uint64_t)(arg1[2]) * x33); + x188 = ((uint64_t)(arg1[2]) * x34); + x189 = ((uint64_t)(arg1[2]) * (arg1[2])); + x190 = ((uint64_t)(arg1[1]) * (x2 * 0x2)); + x191 = ((uint64_t)(arg1[1]) * (x6 * 0x2)); + x192 = ((uint64_t)(arg1[1]) * x9); + x193 = ((uint64_t)(arg1[1]) * (x12 * 0x2)); + x194 = ((uint64_t)(arg1[1]) * x15); + x195 = ((uint64_t)(arg1[1]) * (x18 * 0x2)); + x196 = ((uint64_t)(arg1[1]) * (x21 * 0x2)); + x197 = ((uint64_t)(arg1[1]) * x24); + x198 = ((uint64_t)(arg1[1]) * (x27 * 0x2)); + x199 = ((uint64_t)(arg1[1]) * x28); + x200 = ((uint64_t)(arg1[1]) * (x29 * 0x2)); + x201 = ((uint64_t)(arg1[1]) * x30); + x202 = ((uint64_t)(arg1[1]) * (x31 * 0x2)); + x203 = ((uint64_t)(arg1[1]) * (x32 * 0x2)); + x204 = ((uint64_t)(arg1[1]) * x33); + x205 = ((uint64_t)(arg1[1]) * (x34 * 0x2)); + x206 = ((uint64_t)(arg1[1]) * x35); + x207 = ((uint64_t)(arg1[1]) * ((arg1[1]) * 0x2)); + x208 = ((uint64_t)(arg1[0]) * x3); + x209 = ((uint64_t)(arg1[0]) * x6); + x210 = ((uint64_t)(arg1[0]) * x9); + x211 = ((uint64_t)(arg1[0]) * x12); + x212 = ((uint64_t)(arg1[0]) * x15); + x213 = ((uint64_t)(arg1[0]) * x18); + x214 = ((uint64_t)(arg1[0]) * x21); + x215 = ((uint64_t)(arg1[0]) * x24); + x216 = ((uint64_t)(arg1[0]) * x27); + x217 = ((uint64_t)(arg1[0]) * x28); + x218 = ((uint64_t)(arg1[0]) * x29); + x219 = ((uint64_t)(arg1[0]) * x30); + x220 = ((uint64_t)(arg1[0]) * x31); + x221 = ((uint64_t)(arg1[0]) * x32); + x222 = ((uint64_t)(arg1[0]) * x33); + x223 = ((uint64_t)(arg1[0]) * x34); + x224 = ((uint64_t)(arg1[0]) * x35); + x225 = ((uint64_t)(arg1[0]) * x36); + x226 = ((uint64_t)(arg1[0]) * (arg1[0])); + x227 = + (x226 + + (x190 + + (x174 + (x159 + (x145 + (x132 + (x120 + (x109 + (x99 + x90))))))))); + x228 = (x227 >> 28); + x229 = (uint32_t)(x227 & UINT32_C(0xfffffff)); + x230 = + (x208 + + (x191 + + (x175 + (x160 + (x146 + (x133 + (x121 + (x110 + (x100 + x91))))))))); + x231 = + (x209 + + (x192 + + (x176 + (x161 + (x147 + (x134 + (x122 + (x111 + (x101 + x37))))))))); + x232 = + (x210 + + (x193 + + (x177 + (x162 + (x148 + (x135 + (x123 + (x112 + (x102 + x38))))))))); + x233 = + (x211 + + (x194 + + (x178 + (x163 + (x149 + (x136 + (x124 + (x113 + (x40 + x39))))))))); + x234 = + (x212 + + (x195 + + (x179 + (x164 + (x150 + (x137 + (x125 + (x114 + (x43 + x41))))))))); + x235 = (x213 + + (x196 + + (x180 + (x165 + (x151 + (x138 + (x126 + (x47 + (x44 + x42))))))))); + x236 = (x214 + + (x197 + + (x181 + (x166 + (x152 + (x139 + (x127 + (x52 + (x48 + x45))))))))); + x237 = (x215 + + (x198 + + (x182 + (x167 + (x153 + (x140 + (x58 + (x53 + (x49 + x46))))))))); + x238 = (x216 + + (x199 + + (x183 + (x168 + (x154 + (x141 + (x65 + (x59 + (x54 + x50))))))))); + x239 = (x217 + + (x200 + + (x184 + (x169 + (x155 + (x73 + (x66 + (x60 + (x55 + x51))))))))); + x240 = (x218 + + (x201 + + (x185 + (x170 + (x156 + (x82 + (x74 + (x67 + (x61 + x56))))))))); + x241 = (x219 + + (x202 + + (x186 + (x171 + (x92 + (x83 + (x75 + (x68 + (x62 + x57))))))))); + x242 = (x220 + + (x203 + + (x187 + (x172 + (x103 + (x93 + (x84 + (x76 + (x69 + x63))))))))); + x243 = (x221 + + (x204 + + (x188 + (x115 + (x104 + (x94 + (x85 + (x77 + (x70 + x64))))))))); + x244 = (x222 + + (x205 + + (x189 + (x128 + (x116 + (x105 + (x95 + (x86 + (x78 + x71))))))))); + x245 = (x223 + + (x206 + + (x142 + (x129 + (x117 + (x106 + (x96 + (x87 + (x79 + x72))))))))); + x246 = (x224 + + (x207 + + (x157 + (x143 + (x130 + (x118 + (x107 + (x97 + (x88 + x80))))))))); + x247 = (x225 + + (x173 + + (x158 + (x144 + (x131 + (x119 + (x108 + (x98 + (x89 + x81))))))))); + x248 = (x228 + x247); + x249 = (x248 >> 27); + x250 = (uint32_t)(x248 & UINT32_C(0x7ffffff)); + x251 = (x249 + x246); + x252 = (x251 >> 28); + x253 = (uint32_t)(x251 & UINT32_C(0xfffffff)); + x254 = (x252 + x245); + x255 = (x254 >> 27); + x256 = (uint32_t)(x254 & UINT32_C(0x7ffffff)); + x257 = (x255 + x244); + x258 = (x257 >> 28); + x259 = (uint32_t)(x257 & UINT32_C(0xfffffff)); + x260 = (x258 + x243); + x261 = (x260 >> 27); + x262 = (uint32_t)(x260 & UINT32_C(0x7ffffff)); + x263 = (x261 + x242); + x264 = (x263 >> 27); + x265 = (uint32_t)(x263 & UINT32_C(0x7ffffff)); + x266 = (x264 + x241); + x267 = (x266 >> 28); + x268 = (uint32_t)(x266 & UINT32_C(0xfffffff)); + x269 = (x267 + x240); + x270 = (x269 >> 27); + x271 = (uint32_t)(x269 & UINT32_C(0x7ffffff)); + x272 = (x270 + x239); + x273 = (x272 >> 28); + x274 = (uint32_t)(x272 & UINT32_C(0xfffffff)); + x275 = (x273 + x238); + x276 = (x275 >> 27); + x277 = (uint32_t)(x275 & UINT32_C(0x7ffffff)); + x278 = (x276 + x237); + x279 = (x278 >> 28); + x280 = (uint32_t)(x278 & UINT32_C(0xfffffff)); + x281 = (x279 + x236); + x282 = (x281 >> 27); + x283 = (uint32_t)(x281 & UINT32_C(0x7ffffff)); + x284 = (x282 + x235); + x285 = (x284 >> 27); + x286 = (uint32_t)(x284 & UINT32_C(0x7ffffff)); + x287 = (x285 + x234); + x288 = (x287 >> 28); + x289 = (uint32_t)(x287 & UINT32_C(0xfffffff)); + x290 = (x288 + x233); + x291 = (x290 >> 27); + x292 = (uint32_t)(x290 & UINT32_C(0x7ffffff)); + x293 = (x291 + x232); + x294 = (x293 >> 28); + x295 = (uint32_t)(x293 & UINT32_C(0xfffffff)); + x296 = (x294 + x231); + x297 = (x296 >> 27); + x298 = (uint32_t)(x296 & UINT32_C(0x7ffffff)); + x299 = (x297 + x230); + x300 = (x299 >> 27); + x301 = (uint32_t)(x299 & UINT32_C(0x7ffffff)); + x302 = (x229 + x300); + x303 = (uint32_t)(x302 >> 28); + x304 = (uint32_t)(x302 & UINT32_C(0xfffffff)); + x305 = (x303 + x250); + x306 = (fiat_secp521r1_uint1)(x305 >> 27); + x307 = (x305 & UINT32_C(0x7ffffff)); + x308 = (x306 + x253); + out1[0] = x304; + out1[1] = x307; + out1[2] = x308; + out1[3] = x256; + out1[4] = x259; + out1[5] = x262; + out1[6] = x265; + out1[7] = x268; + out1[8] = x271; + out1[9] = x274; + out1[10] = x277; + out1[11] = x280; + out1[12] = x283; + out1[13] = x286; + out1[14] = x289; + out1[15] = x292; + out1[16] = x295; + out1[17] = x298; + out1[18] = x301; +} + +/* + * The function fiat_secp521r1_carry_add adds two field elements. + * + * Postconditions: + * eval out1 mod m = (eval arg1 + eval arg2) mod m + * + */ +static void +fiat_secp521r1_carry_add( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; + x1 = ((arg1[0]) + (arg2[0])); + x2 = ((x1 >> 28) + ((arg1[1]) + (arg2[1]))); + x3 = ((x2 >> 27) + ((arg1[2]) + (arg2[2]))); + x4 = ((x3 >> 28) + ((arg1[3]) + (arg2[3]))); + x5 = ((x4 >> 27) + ((arg1[4]) + (arg2[4]))); + x6 = ((x5 >> 28) + ((arg1[5]) + (arg2[5]))); + x7 = ((x6 >> 27) + ((arg1[6]) + (arg2[6]))); + x8 = ((x7 >> 27) + ((arg1[7]) + (arg2[7]))); + x9 = ((x8 >> 28) + ((arg1[8]) + (arg2[8]))); + x10 = ((x9 >> 27) + ((arg1[9]) + (arg2[9]))); + x11 = ((x10 >> 28) + ((arg1[10]) + (arg2[10]))); + x12 = ((x11 >> 27) + ((arg1[11]) + (arg2[11]))); + x13 = ((x12 >> 28) + ((arg1[12]) + (arg2[12]))); + x14 = ((x13 >> 27) + ((arg1[13]) + (arg2[13]))); + x15 = ((x14 >> 27) + ((arg1[14]) + (arg2[14]))); + x16 = ((x15 >> 28) + ((arg1[15]) + (arg2[15]))); + x17 = ((x16 >> 27) + ((arg1[16]) + (arg2[16]))); + x18 = ((x17 >> 28) + ((arg1[17]) + (arg2[17]))); + x19 = ((x18 >> 27) + ((arg1[18]) + (arg2[18]))); + x20 = ((x1 & UINT32_C(0xfffffff)) + (x19 >> 27)); + x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff))); + x22 = (x20 & UINT32_C(0xfffffff)); + x23 = (x21 & UINT32_C(0x7ffffff)); + x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff))); + x25 = (x4 & UINT32_C(0x7ffffff)); + x26 = (x5 & UINT32_C(0xfffffff)); + x27 = (x6 & UINT32_C(0x7ffffff)); + x28 = (x7 & UINT32_C(0x7ffffff)); + x29 = (x8 & UINT32_C(0xfffffff)); + x30 = (x9 & UINT32_C(0x7ffffff)); + x31 = (x10 & UINT32_C(0xfffffff)); + x32 = (x11 & UINT32_C(0x7ffffff)); + x33 = (x12 & UINT32_C(0xfffffff)); + x34 = (x13 & UINT32_C(0x7ffffff)); + x35 = (x14 & UINT32_C(0x7ffffff)); + x36 = (x15 & UINT32_C(0xfffffff)); + x37 = (x16 & UINT32_C(0x7ffffff)); + x38 = (x17 & UINT32_C(0xfffffff)); + x39 = (x18 & UINT32_C(0x7ffffff)); + x40 = (x19 & UINT32_C(0x7ffffff)); + out1[0] = x22; + out1[1] = x23; + out1[2] = x24; + out1[3] = x25; + out1[4] = x26; + out1[5] = x27; + out1[6] = x28; + out1[7] = x29; + out1[8] = x30; + out1[9] = x31; + out1[10] = x32; + out1[11] = x33; + out1[12] = x34; + out1[13] = x35; + out1[14] = x36; + out1[15] = x37; + out1[16] = x38; + out1[17] = x39; + out1[18] = x40; +} + +/* + * The function fiat_secp521r1_carry_sub subtracts two field elements. + * + * Postconditions: + * eval out1 mod m = (eval arg1 - eval arg2) mod m + * + */ +static void +fiat_secp521r1_carry_sub( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; + x1 = ((UINT32_C(0x1ffffffe) + (arg1[0])) - (arg2[0])); + x2 = ((x1 >> 28) + ((UINT32_C(0xffffffe) + (arg1[1])) - (arg2[1]))); + x3 = ((x2 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[2])) - (arg2[2]))); + x4 = ((x3 >> 28) + ((UINT32_C(0xffffffe) + (arg1[3])) - (arg2[3]))); + x5 = ((x4 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[4])) - (arg2[4]))); + x6 = ((x5 >> 28) + ((UINT32_C(0xffffffe) + (arg1[5])) - (arg2[5]))); + x7 = ((x6 >> 27) + ((UINT32_C(0xffffffe) + (arg1[6])) - (arg2[6]))); + x8 = ((x7 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[7])) - (arg2[7]))); + x9 = ((x8 >> 28) + ((UINT32_C(0xffffffe) + (arg1[8])) - (arg2[8]))); + x10 = ((x9 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[9])) - (arg2[9]))); + x11 = ((x10 >> 28) + ((UINT32_C(0xffffffe) + (arg1[10])) - (arg2[10]))); + x12 = ((x11 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[11])) - (arg2[11]))); + x13 = ((x12 >> 28) + ((UINT32_C(0xffffffe) + (arg1[12])) - (arg2[12]))); + x14 = ((x13 >> 27) + ((UINT32_C(0xffffffe) + (arg1[13])) - (arg2[13]))); + x15 = ((x14 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[14])) - (arg2[14]))); + x16 = ((x15 >> 28) + ((UINT32_C(0xffffffe) + (arg1[15])) - (arg2[15]))); + x17 = ((x16 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[16])) - (arg2[16]))); + x18 = ((x17 >> 28) + ((UINT32_C(0xffffffe) + (arg1[17])) - (arg2[17]))); + x19 = ((x18 >> 27) + ((UINT32_C(0xffffffe) + (arg1[18])) - (arg2[18]))); + x20 = ((x1 & UINT32_C(0xfffffff)) + (x19 >> 27)); + x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff))); + x22 = (x20 & UINT32_C(0xfffffff)); + x23 = (x21 & UINT32_C(0x7ffffff)); + x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff))); + x25 = (x4 & UINT32_C(0x7ffffff)); + x26 = (x5 & UINT32_C(0xfffffff)); + x27 = (x6 & UINT32_C(0x7ffffff)); + x28 = (x7 & UINT32_C(0x7ffffff)); + x29 = (x8 & UINT32_C(0xfffffff)); + x30 = (x9 & UINT32_C(0x7ffffff)); + x31 = (x10 & UINT32_C(0xfffffff)); + x32 = (x11 & UINT32_C(0x7ffffff)); + x33 = (x12 & UINT32_C(0xfffffff)); + x34 = (x13 & UINT32_C(0x7ffffff)); + x35 = (x14 & UINT32_C(0x7ffffff)); + x36 = (x15 & UINT32_C(0xfffffff)); + x37 = (x16 & UINT32_C(0x7ffffff)); + x38 = (x17 & UINT32_C(0xfffffff)); + x39 = (x18 & UINT32_C(0x7ffffff)); + x40 = (x19 & UINT32_C(0x7ffffff)); + out1[0] = x22; + out1[1] = x23; + out1[2] = x24; + out1[3] = x25; + out1[4] = x26; + out1[5] = x27; + out1[6] = x28; + out1[7] = x29; + out1[8] = x30; + out1[9] = x31; + out1[10] = x32; + out1[11] = x33; + out1[12] = x34; + out1[13] = x35; + out1[14] = x36; + out1[15] = x37; + out1[16] = x38; + out1[17] = x39; + out1[18] = x40; +} + +/* + * The function fiat_secp521r1_carry_opp negates a field element. + * + * Postconditions: + * eval out1 mod m = -eval arg1 mod m + * + */ +static void +fiat_secp521r1_carry_opp( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; + x1 = (UINT32_C(0x1ffffffe) - (arg1[0])); + x2 = ((fiat_secp521r1_uint1)(x1 >> 28) + (UINT32_C(0xffffffe) - (arg1[1]))); + x3 = + ((fiat_secp521r1_uint1)(x2 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[2]))); + x4 = ((fiat_secp521r1_uint1)(x3 >> 28) + (UINT32_C(0xffffffe) - (arg1[3]))); + x5 = + ((fiat_secp521r1_uint1)(x4 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[4]))); + x6 = ((fiat_secp521r1_uint1)(x5 >> 28) + (UINT32_C(0xffffffe) - (arg1[5]))); + x7 = ((fiat_secp521r1_uint1)(x6 >> 27) + (UINT32_C(0xffffffe) - (arg1[6]))); + x8 = + ((fiat_secp521r1_uint1)(x7 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[7]))); + x9 = ((fiat_secp521r1_uint1)(x8 >> 28) + (UINT32_C(0xffffffe) - (arg1[8]))); + x10 = + ((fiat_secp521r1_uint1)(x9 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[9]))); + x11 = ((fiat_secp521r1_uint1)(x10 >> 28) + + (UINT32_C(0xffffffe) - (arg1[10]))); + x12 = ((fiat_secp521r1_uint1)(x11 >> 27) + + (UINT32_C(0x1ffffffe) - (arg1[11]))); + x13 = ((fiat_secp521r1_uint1)(x12 >> 28) + + (UINT32_C(0xffffffe) - (arg1[12]))); + x14 = ((fiat_secp521r1_uint1)(x13 >> 27) + + (UINT32_C(0xffffffe) - (arg1[13]))); + x15 = ((fiat_secp521r1_uint1)(x14 >> 27) + + (UINT32_C(0x1ffffffe) - (arg1[14]))); + x16 = ((fiat_secp521r1_uint1)(x15 >> 28) + + (UINT32_C(0xffffffe) - (arg1[15]))); + x17 = ((fiat_secp521r1_uint1)(x16 >> 27) + + (UINT32_C(0x1ffffffe) - (arg1[16]))); + x18 = ((fiat_secp521r1_uint1)(x17 >> 28) + + (UINT32_C(0xffffffe) - (arg1[17]))); + x19 = ((fiat_secp521r1_uint1)(x18 >> 27) + + (UINT32_C(0xffffffe) - (arg1[18]))); + x20 = ((x1 & UINT32_C(0xfffffff)) + + (uint32_t)(fiat_secp521r1_uint1)(x19 >> 27)); + x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff))); + x22 = (x20 & UINT32_C(0xfffffff)); + x23 = (x21 & UINT32_C(0x7ffffff)); + x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff))); + x25 = (x4 & UINT32_C(0x7ffffff)); + x26 = (x5 & UINT32_C(0xfffffff)); + x27 = (x6 & UINT32_C(0x7ffffff)); + x28 = (x7 & UINT32_C(0x7ffffff)); + x29 = (x8 & UINT32_C(0xfffffff)); + x30 = (x9 & UINT32_C(0x7ffffff)); + x31 = (x10 & UINT32_C(0xfffffff)); + x32 = (x11 & UINT32_C(0x7ffffff)); + x33 = (x12 & UINT32_C(0xfffffff)); + x34 = (x13 & UINT32_C(0x7ffffff)); + x35 = (x14 & UINT32_C(0x7ffffff)); + x36 = (x15 & UINT32_C(0xfffffff)); + x37 = (x16 & UINT32_C(0x7ffffff)); + x38 = (x17 & UINT32_C(0xfffffff)); + x39 = (x18 & UINT32_C(0x7ffffff)); + x40 = (x19 & UINT32_C(0x7ffffff)); + out1[0] = x22; + out1[1] = x23; + out1[2] = x24; + out1[3] = x25; + out1[4] = x26; + out1[5] = x27; + out1[6] = x28; + out1[7] = x29; + out1[8] = x30; + out1[9] = x31; + out1[10] = x32; + out1[11] = x33; + out1[12] = x34; + out1[13] = x35; + out1[14] = x36; + out1[15] = x37; + out1[16] = x38; + out1[17] = x39; + out1[18] = x40; +} + +/* + * The function fiat_secp521r1_selectznz is a multi-limb conditional select. + * + * Postconditions: + * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + */ +static void +fiat_secp521r1_selectznz(uint32_t out1[19], + fiat_secp521r1_uint1 arg1, + const uint32_t arg2[19], + const uint32_t arg3[19]) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + fiat_secp521r1_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0])); + fiat_secp521r1_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1])); + fiat_secp521r1_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2])); + fiat_secp521r1_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3])); + fiat_secp521r1_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4])); + fiat_secp521r1_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5])); + fiat_secp521r1_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6])); + fiat_secp521r1_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7])); + fiat_secp521r1_cmovznz_u32(&x9, arg1, (arg2[8]), (arg3[8])); + fiat_secp521r1_cmovznz_u32(&x10, arg1, (arg2[9]), (arg3[9])); + fiat_secp521r1_cmovznz_u32(&x11, arg1, (arg2[10]), (arg3[10])); + fiat_secp521r1_cmovznz_u32(&x12, arg1, (arg2[11]), (arg3[11])); + fiat_secp521r1_cmovznz_u32(&x13, arg1, (arg2[12]), (arg3[12])); + fiat_secp521r1_cmovznz_u32(&x14, arg1, (arg2[13]), (arg3[13])); + fiat_secp521r1_cmovznz_u32(&x15, arg1, (arg2[14]), (arg3[14])); + fiat_secp521r1_cmovznz_u32(&x16, arg1, (arg2[15]), (arg3[15])); + fiat_secp521r1_cmovznz_u32(&x17, arg1, (arg2[16]), (arg3[16])); + fiat_secp521r1_cmovznz_u32(&x18, arg1, (arg2[17]), (arg3[17])); + fiat_secp521r1_cmovznz_u32(&x19, arg1, (arg2[18]), (arg3[18])); + out1[0] = x1; + out1[1] = x2; + out1[2] = x3; + out1[3] = x4; + out1[4] = x5; + out1[5] = x6; + out1[6] = x7; + out1[7] = x8; + out1[8] = x9; + out1[9] = x10; + out1[10] = x11; + out1[11] = x12; + out1[12] = x13; + out1[13] = x14; + out1[14] = x15; + out1[15] = x16; + out1[16] = x17; + out1[17] = x18; + out1[18] = x19; +} + +/* + * The function fiat_secp521r1_to_bytes serializes a field element to bytes in little-endian order. + * + * Postconditions: + * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65] + * + * Output Bounds: + * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] + */ +static void +fiat_secp521r1_to_bytes( + uint8_t out1[66], const fiat_secp521r1_tight_field_element arg1) +{ + uint32_t x1; + fiat_secp521r1_uint1 x2; + uint32_t x3; + fiat_secp521r1_uint1 x4; + uint32_t x5; + fiat_secp521r1_uint1 x6; + uint32_t x7; + fiat_secp521r1_uint1 x8; + uint32_t x9; + fiat_secp521r1_uint1 x10; + uint32_t x11; + fiat_secp521r1_uint1 x12; + uint32_t x13; + fiat_secp521r1_uint1 x14; + uint32_t x15; + fiat_secp521r1_uint1 x16; + uint32_t x17; + fiat_secp521r1_uint1 x18; + uint32_t x19; + fiat_secp521r1_uint1 x20; + uint32_t x21; + fiat_secp521r1_uint1 x22; + uint32_t x23; + fiat_secp521r1_uint1 x24; + uint32_t x25; + fiat_secp521r1_uint1 x26; + uint32_t x27; + fiat_secp521r1_uint1 x28; + uint32_t x29; + fiat_secp521r1_uint1 x30; + uint32_t x31; + fiat_secp521r1_uint1 x32; + uint32_t x33; + fiat_secp521r1_uint1 x34; + uint32_t x35; + fiat_secp521r1_uint1 x36; + uint32_t x37; + fiat_secp521r1_uint1 x38; + uint32_t x39; + uint32_t x40; + fiat_secp521r1_uint1 x41; + uint32_t x42; + fiat_secp521r1_uint1 x43; + uint32_t x44; + fiat_secp521r1_uint1 x45; + uint32_t x46; + fiat_secp521r1_uint1 x47; + uint32_t x48; + fiat_secp521r1_uint1 x49; + uint32_t x50; + fiat_secp521r1_uint1 x51; + uint32_t x52; + fiat_secp521r1_uint1 x53; + uint32_t x54; + fiat_secp521r1_uint1 x55; + uint32_t x56; + fiat_secp521r1_uint1 x57; + uint32_t x58; + fiat_secp521r1_uint1 x59; + uint32_t x60; + fiat_secp521r1_uint1 x61; + uint32_t x62; + fiat_secp521r1_uint1 x63; + uint32_t x64; + fiat_secp521r1_uint1 x65; + uint32_t x66; + fiat_secp521r1_uint1 x67; + uint32_t x68; + fiat_secp521r1_uint1 x69; + uint32_t x70; + fiat_secp521r1_uint1 x71; + uint32_t x72; + fiat_secp521r1_uint1 x73; + uint32_t x74; + fiat_secp521r1_uint1 x75; + uint32_t x76; + fiat_secp521r1_uint1 x77; + uint64_t x78; + uint32_t x79; + uint64_t x80; + uint32_t x81; + uint32_t x82; + uint32_t x83; + uint64_t x84; + uint32_t x85; + uint64_t x86; + uint32_t x87; + uint32_t x88; + uint32_t x89; + uint64_t x90; + uint32_t x91; + uint64_t x92; + uint32_t x93; + uint8_t x94; + uint32_t x95; + uint8_t x96; + uint32_t x97; + uint8_t x98; + uint8_t x99; + uint32_t x100; + uint8_t x101; + uint32_t x102; + uint8_t x103; + uint32_t x104; + uint8_t x105; + uint8_t x106; + uint64_t x107; + uint8_t x108; + uint32_t x109; + uint8_t x110; + uint32_t x111; + uint8_t x112; + uint32_t x113; + uint8_t x114; + uint8_t x115; + uint32_t x116; + uint8_t x117; + uint32_t x118; + uint8_t x119; + uint32_t x120; + uint8_t x121; + uint8_t x122; + uint64_t x123; + uint8_t x124; + uint32_t x125; + uint8_t x126; + uint32_t x127; + uint8_t x128; + uint32_t x129; + uint8_t x130; + uint8_t x131; + uint32_t x132; + uint8_t x133; + uint32_t x134; + uint8_t x135; + uint32_t x136; + uint8_t x137; + uint8_t x138; + uint32_t x139; + uint8_t x140; + uint32_t x141; + uint8_t x142; + uint32_t x143; + uint8_t x144; + uint8_t x145; + uint8_t x146; + uint32_t x147; + uint8_t x148; + uint32_t x149; + uint8_t x150; + uint8_t x151; + uint32_t x152; + uint8_t x153; + uint32_t x154; + uint8_t x155; + uint32_t x156; + uint8_t x157; + uint8_t x158; + uint64_t x159; + uint8_t x160; + uint32_t x161; + uint8_t x162; + uint32_t x163; + uint8_t x164; + uint32_t x165; + uint8_t x166; + uint8_t x167; + uint32_t x168; + uint8_t x169; + uint32_t x170; + uint8_t x171; + uint32_t x172; + uint8_t x173; + uint8_t x174; + uint64_t x175; + uint8_t x176; + uint32_t x177; + uint8_t x178; + uint32_t x179; + uint8_t x180; + uint32_t x181; + uint8_t x182; + uint8_t x183; + uint32_t x184; + uint8_t x185; + uint32_t x186; + uint8_t x187; + uint32_t x188; + uint8_t x189; + uint8_t x190; + uint32_t x191; + uint8_t x192; + uint32_t x193; + uint8_t x194; + uint32_t x195; + uint8_t x196; + uint8_t x197; + uint8_t x198; + uint32_t x199; + uint8_t x200; + uint32_t x201; + uint8_t x202; + uint8_t x203; + uint32_t x204; + uint8_t x205; + uint32_t x206; + uint8_t x207; + uint32_t x208; + uint8_t x209; + uint8_t x210; + uint64_t x211; + uint8_t x212; + uint32_t x213; + uint8_t x214; + uint32_t x215; + uint8_t x216; + uint32_t x217; + uint8_t x218; + uint8_t x219; + uint32_t x220; + uint8_t x221; + uint32_t x222; + uint8_t x223; + uint32_t x224; + uint8_t x225; + uint8_t x226; + uint64_t x227; + uint8_t x228; + uint32_t x229; + uint8_t x230; + uint32_t x231; + uint8_t x232; + uint32_t x233; + uint8_t x234; + fiat_secp521r1_uint1 x235; + fiat_secp521r1_subborrowx_u28(&x1, &x2, 0x0, (arg1[0]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x3, &x4, x2, (arg1[1]), UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x5, &x6, x4, (arg1[2]), UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x7, &x8, x6, (arg1[3]), UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x9, &x10, x8, (arg1[4]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x11, &x12, x10, (arg1[5]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u27(&x13, &x14, x12, (arg1[6]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x15, &x16, x14, (arg1[7]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x17, &x18, x16, (arg1[8]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x19, &x20, x18, (arg1[9]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x21, &x22, x20, (arg1[10]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x23, &x24, x22, (arg1[11]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x25, &x26, x24, (arg1[12]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u27(&x27, &x28, x26, (arg1[13]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x29, &x30, x28, (arg1[14]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x31, &x32, x30, (arg1[15]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u28(&x33, &x34, x32, (arg1[16]), + UINT32_C(0xfffffff)); + fiat_secp521r1_subborrowx_u27(&x35, &x36, x34, (arg1[17]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_subborrowx_u27(&x37, &x38, x36, (arg1[18]), + UINT32_C(0x7ffffff)); + fiat_secp521r1_cmovznz_u32(&x39, x38, 0x0, UINT32_C(0xffffffff)); + fiat_secp521r1_addcarryx_u28(&x40, &x41, 0x0, x1, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x42, &x43, x41, x3, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x44, &x45, x43, x5, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x46, &x47, x45, x7, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x48, &x49, x47, x9, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x50, &x51, x49, x11, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u27(&x52, &x53, x51, x13, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x54, &x55, x53, x15, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x56, &x57, x55, x17, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x58, &x59, x57, x19, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x60, &x61, x59, x21, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x62, &x63, x61, x23, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x64, &x65, x63, x25, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u27(&x66, &x67, x65, x27, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x68, &x69, x67, x29, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x70, &x71, x69, x31, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u28(&x72, &x73, x71, x33, + (x39 & UINT32_C(0xfffffff))); + fiat_secp521r1_addcarryx_u27(&x74, &x75, x73, x35, + (x39 & UINT32_C(0x7ffffff))); + fiat_secp521r1_addcarryx_u27(&x76, &x77, x75, x37, + (x39 & UINT32_C(0x7ffffff))); + x78 = ((uint64_t)x76 << 6); + x79 = (x74 << 3); + x80 = ((uint64_t)x72 << 7); + x81 = (x70 << 4); + x82 = (x66 << 5); + x83 = (x64 << 2); + x84 = ((uint64_t)x62 << 6); + x85 = (x60 << 3); + x86 = ((uint64_t)x58 << 7); + x87 = (x56 << 4); + x88 = (x52 << 5); + x89 = (x50 << 2); + x90 = ((uint64_t)x48 << 6); + x91 = (x46 << 3); + x92 = ((uint64_t)x44 << 7); + x93 = (x42 << 4); + x94 = (uint8_t)(x40 & UINT8_C(0xff)); + x95 = (x40 >> 8); + x96 = (uint8_t)(x95 & UINT8_C(0xff)); + x97 = (x95 >> 8); + x98 = (uint8_t)(x97 & UINT8_C(0xff)); + x99 = (uint8_t)(x97 >> 8); + x100 = (x93 + (uint32_t)x99); + x101 = (uint8_t)(x100 & UINT8_C(0xff)); + x102 = (x100 >> 8); + x103 = (uint8_t)(x102 & UINT8_C(0xff)); + x104 = (x102 >> 8); + x105 = (uint8_t)(x104 & UINT8_C(0xff)); + x106 = (uint8_t)(x104 >> 8); + x107 = (x92 + (uint64_t)x106); + x108 = (uint8_t)(x107 & UINT8_C(0xff)); + x109 = (uint32_t)(x107 >> 8); + x110 = (uint8_t)(x109 & UINT8_C(0xff)); + x111 = (x109 >> 8); + x112 = (uint8_t)(x111 & UINT8_C(0xff)); + x113 = (x111 >> 8); + x114 = (uint8_t)(x113 & UINT8_C(0xff)); + x115 = (uint8_t)(x113 >> 8); + x116 = (x91 + (uint32_t)x115); + x117 = (uint8_t)(x116 & UINT8_C(0xff)); + x118 = (x116 >> 8); + x119 = (uint8_t)(x118 & UINT8_C(0xff)); + x120 = (x118 >> 8); + x121 = (uint8_t)(x120 & UINT8_C(0xff)); + x122 = (uint8_t)(x120 >> 8); + x123 = (x90 + (uint64_t)x122); + x124 = (uint8_t)(x123 & UINT8_C(0xff)); + x125 = (uint32_t)(x123 >> 8); + x126 = (uint8_t)(x125 & UINT8_C(0xff)); + x127 = (x125 >> 8); + x128 = (uint8_t)(x127 & UINT8_C(0xff)); + x129 = (x127 >> 8); + x130 = (uint8_t)(x129 & UINT8_C(0xff)); + x131 = (uint8_t)(x129 >> 8); + x132 = (x89 + (uint32_t)x131); + x133 = (uint8_t)(x132 & UINT8_C(0xff)); + x134 = (x132 >> 8); + x135 = (uint8_t)(x134 & UINT8_C(0xff)); + x136 = (x134 >> 8); + x137 = (uint8_t)(x136 & UINT8_C(0xff)); + x138 = (uint8_t)(x136 >> 8); + x139 = (x88 + (uint32_t)x138); + x140 = (uint8_t)(x139 & UINT8_C(0xff)); + x141 = (x139 >> 8); + x142 = (uint8_t)(x141 & UINT8_C(0xff)); + x143 = (x141 >> 8); + x144 = (uint8_t)(x143 & UINT8_C(0xff)); + x145 = (uint8_t)(x143 >> 8); + x146 = (uint8_t)(x54 & UINT8_C(0xff)); + x147 = (x54 >> 8); + x148 = (uint8_t)(x147 & UINT8_C(0xff)); + x149 = (x147 >> 8); + x150 = (uint8_t)(x149 & UINT8_C(0xff)); + x151 = (uint8_t)(x149 >> 8); + x152 = (x87 + (uint32_t)x151); + x153 = (uint8_t)(x152 & UINT8_C(0xff)); + x154 = (x152 >> 8); + x155 = (uint8_t)(x154 & UINT8_C(0xff)); + x156 = (x154 >> 8); + x157 = (uint8_t)(x156 & UINT8_C(0xff)); + x158 = (uint8_t)(x156 >> 8); + x159 = (x86 + (uint64_t)x158); + x160 = (uint8_t)(x159 & UINT8_C(0xff)); + x161 = (uint32_t)(x159 >> 8); + x162 = (uint8_t)(x161 & UINT8_C(0xff)); + x163 = (x161 >> 8); + x164 = (uint8_t)(x163 & UINT8_C(0xff)); + x165 = (x163 >> 8); + x166 = (uint8_t)(x165 & UINT8_C(0xff)); + x167 = (uint8_t)(x165 >> 8); + x168 = (x85 + (uint32_t)x167); + x169 = (uint8_t)(x168 & UINT8_C(0xff)); + x170 = (x168 >> 8); + x171 = (uint8_t)(x170 & UINT8_C(0xff)); + x172 = (x170 >> 8); + x173 = (uint8_t)(x172 & UINT8_C(0xff)); + x174 = (uint8_t)(x172 >> 8); + x175 = (x84 + (uint64_t)x174); + x176 = (uint8_t)(x175 & UINT8_C(0xff)); + x177 = (uint32_t)(x175 >> 8); + x178 = (uint8_t)(x177 & UINT8_C(0xff)); + x179 = (x177 >> 8); + x180 = (uint8_t)(x179 & UINT8_C(0xff)); + x181 = (x179 >> 8); + x182 = (uint8_t)(x181 & UINT8_C(0xff)); + x183 = (uint8_t)(x181 >> 8); + x184 = (x83 + (uint32_t)x183); + x185 = (uint8_t)(x184 & UINT8_C(0xff)); + x186 = (x184 >> 8); + x187 = (uint8_t)(x186 & UINT8_C(0xff)); + x188 = (x186 >> 8); + x189 = (uint8_t)(x188 & UINT8_C(0xff)); + x190 = (uint8_t)(x188 >> 8); + x191 = (x82 + (uint32_t)x190); + x192 = (uint8_t)(x191 & UINT8_C(0xff)); + x193 = (x191 >> 8); + x194 = (uint8_t)(x193 & UINT8_C(0xff)); + x195 = (x193 >> 8); + x196 = (uint8_t)(x195 & UINT8_C(0xff)); + x197 = (uint8_t)(x195 >> 8); + x198 = (uint8_t)(x68 & UINT8_C(0xff)); + x199 = (x68 >> 8); + x200 = (uint8_t)(x199 & UINT8_C(0xff)); + x201 = (x199 >> 8); + x202 = (uint8_t)(x201 & UINT8_C(0xff)); + x203 = (uint8_t)(x201 >> 8); + x204 = (x81 + (uint32_t)x203); + x205 = (uint8_t)(x204 & UINT8_C(0xff)); + x206 = (x204 >> 8); + x207 = (uint8_t)(x206 & UINT8_C(0xff)); + x208 = (x206 >> 8); + x209 = (uint8_t)(x208 & UINT8_C(0xff)); + x210 = (uint8_t)(x208 >> 8); + x211 = (x80 + (uint64_t)x210); + x212 = (uint8_t)(x211 & UINT8_C(0xff)); + x213 = (uint32_t)(x211 >> 8); + x214 = (uint8_t)(x213 & UINT8_C(0xff)); + x215 = (x213 >> 8); + x216 = (uint8_t)(x215 & UINT8_C(0xff)); + x217 = (x215 >> 8); + x218 = (uint8_t)(x217 & UINT8_C(0xff)); + x219 = (uint8_t)(x217 >> 8); + x220 = (x79 + (uint32_t)x219); + x221 = (uint8_t)(x220 & UINT8_C(0xff)); + x222 = (x220 >> 8); + x223 = (uint8_t)(x222 & UINT8_C(0xff)); + x224 = (x222 >> 8); + x225 = (uint8_t)(x224 & UINT8_C(0xff)); + x226 = (uint8_t)(x224 >> 8); + x227 = (x78 + (uint64_t)x226); + x228 = (uint8_t)(x227 & UINT8_C(0xff)); + x229 = (uint32_t)(x227 >> 8); + x230 = (uint8_t)(x229 & UINT8_C(0xff)); + x231 = (x229 >> 8); + x232 = (uint8_t)(x231 & UINT8_C(0xff)); + x233 = (x231 >> 8); + x234 = (uint8_t)(x233 & UINT8_C(0xff)); + x235 = (fiat_secp521r1_uint1)(x233 >> 8); + out1[0] = x94; + out1[1] = x96; + out1[2] = x98; + out1[3] = x101; + out1[4] = x103; + out1[5] = x105; + out1[6] = x108; + out1[7] = x110; + out1[8] = x112; + out1[9] = x114; + out1[10] = x117; + out1[11] = x119; + out1[12] = x121; + out1[13] = x124; + out1[14] = x126; + out1[15] = x128; + out1[16] = x130; + out1[17] = x133; + out1[18] = x135; + out1[19] = x137; + out1[20] = x140; + out1[21] = x142; + out1[22] = x144; + out1[23] = x145; + out1[24] = x146; + out1[25] = x148; + out1[26] = x150; + out1[27] = x153; + out1[28] = x155; + out1[29] = x157; + out1[30] = x160; + out1[31] = x162; + out1[32] = x164; + out1[33] = x166; + out1[34] = x169; + out1[35] = x171; + out1[36] = x173; + out1[37] = x176; + out1[38] = x178; + out1[39] = x180; + out1[40] = x182; + out1[41] = x185; + out1[42] = x187; + out1[43] = x189; + out1[44] = x192; + out1[45] = x194; + out1[46] = x196; + out1[47] = x197; + out1[48] = x198; + out1[49] = x200; + out1[50] = x202; + out1[51] = x205; + out1[52] = x207; + out1[53] = x209; + out1[54] = x212; + out1[55] = x214; + out1[56] = x216; + out1[57] = x218; + out1[58] = x221; + out1[59] = x223; + out1[60] = x225; + out1[61] = x228; + out1[62] = x230; + out1[63] = x232; + out1[64] = x234; + out1[65] = x235; +} + +/* + * The function fiat_secp521r1_from_bytes deserializes a field element from bytes in little-endian order. + * + * Postconditions: + * eval out1 mod m = bytes_eval arg1 mod m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] + */ +static void +fiat_secp521r1_from_bytes(fiat_secp521r1_tight_field_element out1, + const uint8_t arg1[66]) +{ + uint32_t x1; + uint32_t x2; + uint32_t x3; + uint32_t x4; + uint32_t x5; + uint32_t x6; + uint32_t x7; + uint64_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint8_t x18; + uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint64_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint64_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; + uint32_t x41; + uint8_t x42; + uint32_t x43; + uint32_t x44; + uint32_t x45; + uint32_t x46; + uint32_t x47; + uint32_t x48; + uint64_t x49; + uint32_t x50; + uint32_t x51; + uint32_t x52; + uint32_t x53; + uint32_t x54; + uint32_t x55; + uint64_t x56; + uint32_t x57; + uint32_t x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + uint32_t x63; + uint32_t x64; + uint32_t x65; + uint8_t x66; + uint32_t x67; + uint32_t x68; + uint32_t x69; + uint32_t x70; + uint8_t x71; + uint32_t x72; + uint32_t x73; + uint32_t x74; + uint32_t x75; + fiat_secp521r1_uint1 x76; + uint32_t x77; + uint32_t x78; + uint32_t x79; + uint64_t x80; + uint32_t x81; + uint8_t x82; + uint32_t x83; + uint32_t x84; + uint32_t x85; + uint32_t x86; + uint8_t x87; + uint32_t x88; + uint32_t x89; + uint32_t x90; + uint64_t x91; + uint32_t x92; + uint8_t x93; + uint32_t x94; + uint32_t x95; + uint32_t x96; + uint32_t x97; + uint8_t x98; + uint32_t x99; + uint32_t x100; + uint32_t x101; + uint32_t x102; + uint32_t x103; + uint32_t x104; + uint32_t x105; + uint8_t x106; + uint32_t x107; + uint32_t x108; + uint32_t x109; + uint32_t x110; + fiat_secp521r1_uint1 x111; + uint32_t x112; + uint32_t x113; + uint32_t x114; + uint64_t x115; + uint32_t x116; + uint8_t x117; + uint32_t x118; + uint32_t x119; + uint32_t x120; + uint32_t x121; + uint8_t x122; + uint32_t x123; + uint32_t x124; + uint32_t x125; + uint64_t x126; + uint32_t x127; + uint8_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint8_t x133; + uint32_t x134; + uint32_t x135; + uint32_t x136; + uint32_t x137; + uint32_t x138; + uint32_t x139; + uint32_t x140; + uint8_t x141; + uint32_t x142; + uint32_t x143; + uint32_t x144; + uint32_t x145; + fiat_secp521r1_uint1 x146; + uint32_t x147; + uint32_t x148; + uint32_t x149; + uint64_t x150; + uint32_t x151; + uint8_t x152; + uint32_t x153; + uint32_t x154; + uint32_t x155; + uint32_t x156; + uint8_t x157; + uint32_t x158; + uint32_t x159; + uint32_t x160; + uint32_t x161; + x1 = ((uint32_t)(fiat_secp521r1_uint1)(arg1[65]) << 26); + x2 = ((uint32_t)(arg1[64]) << 18); + x3 = ((uint32_t)(arg1[63]) << 10); + x4 = ((uint32_t)(arg1[62]) << 2); + x5 = ((uint32_t)(arg1[61]) << 21); + x6 = ((uint32_t)(arg1[60]) << 13); + x7 = ((uint32_t)(arg1[59]) << 5); + x8 = ((uint64_t)(arg1[58]) << 25); + x9 = ((uint32_t)(arg1[57]) << 17); + x10 = ((uint32_t)(arg1[56]) << 9); + x11 = ((uint32_t)(arg1[55]) * 0x2); + x12 = ((uint32_t)(arg1[54]) << 20); + x13 = ((uint32_t)(arg1[53]) << 12); + x14 = ((uint32_t)(arg1[52]) << 4); + x15 = ((uint32_t)(arg1[51]) << 24); + x16 = ((uint32_t)(arg1[50]) << 16); + x17 = ((uint32_t)(arg1[49]) << 8); + x18 = (arg1[48]); + x19 = ((uint32_t)(arg1[47]) << 19); + x20 = ((uint32_t)(arg1[46]) << 11); + x21 = ((uint32_t)(arg1[45]) << 3); + x22 = ((uint32_t)(arg1[44]) << 22); + x23 = ((uint32_t)(arg1[43]) << 14); + x24 = ((uint32_t)(arg1[42]) << 6); + x25 = ((uint64_t)(arg1[41]) << 26); + x26 = ((uint32_t)(arg1[40]) << 18); + x27 = ((uint32_t)(arg1[39]) << 10); + x28 = ((uint32_t)(arg1[38]) << 2); + x29 = ((uint32_t)(arg1[37]) << 21); + x30 = ((uint32_t)(arg1[36]) << 13); + x31 = ((uint32_t)(arg1[35]) << 5); + x32 = ((uint64_t)(arg1[34]) << 25); + x33 = ((uint32_t)(arg1[33]) << 17); + x34 = ((uint32_t)(arg1[32]) << 9); + x35 = ((uint32_t)(arg1[31]) * 0x2); + x36 = ((uint32_t)(arg1[30]) << 20); + x37 = ((uint32_t)(arg1[29]) << 12); + x38 = ((uint32_t)(arg1[28]) << 4); + x39 = ((uint32_t)(arg1[27]) << 24); + x40 = ((uint32_t)(arg1[26]) << 16); + x41 = ((uint32_t)(arg1[25]) << 8); + x42 = (arg1[24]); + x43 = ((uint32_t)(arg1[23]) << 19); + x44 = ((uint32_t)(arg1[22]) << 11); + x45 = ((uint32_t)(arg1[21]) << 3); + x46 = ((uint32_t)(arg1[20]) << 22); + x47 = ((uint32_t)(arg1[19]) << 14); + x48 = ((uint32_t)(arg1[18]) << 6); + x49 = ((uint64_t)(arg1[17]) << 26); + x50 = ((uint32_t)(arg1[16]) << 18); + x51 = ((uint32_t)(arg1[15]) << 10); + x52 = ((uint32_t)(arg1[14]) << 2); + x53 = ((uint32_t)(arg1[13]) << 21); + x54 = ((uint32_t)(arg1[12]) << 13); + x55 = ((uint32_t)(arg1[11]) << 5); + x56 = ((uint64_t)(arg1[10]) << 25); + x57 = ((uint32_t)(arg1[9]) << 17); + x58 = ((uint32_t)(arg1[8]) << 9); + x59 = ((uint32_t)(arg1[7]) * 0x2); + x60 = ((uint32_t)(arg1[6]) << 20); + x61 = ((uint32_t)(arg1[5]) << 12); + x62 = ((uint32_t)(arg1[4]) << 4); + x63 = ((uint32_t)(arg1[3]) << 24); + x64 = ((uint32_t)(arg1[2]) << 16); + x65 = ((uint32_t)(arg1[1]) << 8); + x66 = (arg1[0]); + x67 = (x65 + (uint32_t)x66); + x68 = (x64 + x67); + x69 = (x63 + x68); + x70 = (x69 & UINT32_C(0xfffffff)); + x71 = (uint8_t)(x69 >> 28); + x72 = (x62 + (uint32_t)x71); + x73 = (x61 + x72); + x74 = (x60 + x73); + x75 = (x74 & UINT32_C(0x7ffffff)); + x76 = (fiat_secp521r1_uint1)(x74 >> 27); + x77 = (x59 + (uint32_t)x76); + x78 = (x58 + x77); + x79 = (x57 + x78); + x80 = (x56 + x79); + x81 = (uint32_t)(x80 & UINT32_C(0xfffffff)); + x82 = (uint8_t)(x80 >> 28); + x83 = (x55 + (uint32_t)x82); + x84 = (x54 + x83); + x85 = (x53 + x84); + x86 = (x85 & UINT32_C(0x7ffffff)); + x87 = (uint8_t)(x85 >> 27); + x88 = (x52 + (uint32_t)x87); + x89 = (x51 + x88); + x90 = (x50 + x89); + x91 = (x49 + x90); + x92 = (uint32_t)(x91 & UINT32_C(0xfffffff)); + x93 = (uint8_t)(x91 >> 28); + x94 = (x48 + (uint32_t)x93); + x95 = (x47 + x94); + x96 = (x46 + x95); + x97 = (x96 & UINT32_C(0x7ffffff)); + x98 = (uint8_t)(x96 >> 27); + x99 = (x45 + (uint32_t)x98); + x100 = (x44 + x99); + x101 = (x43 + x100); + x102 = (x41 + (uint32_t)x42); + x103 = (x40 + x102); + x104 = (x39 + x103); + x105 = (x104 & UINT32_C(0xfffffff)); + x106 = (uint8_t)(x104 >> 28); + x107 = (x38 + (uint32_t)x106); + x108 = (x37 + x107); + x109 = (x36 + x108); + x110 = (x109 & UINT32_C(0x7ffffff)); + x111 = (fiat_secp521r1_uint1)(x109 >> 27); + x112 = (x35 + (uint32_t)x111); + x113 = (x34 + x112); + x114 = (x33 + x113); + x115 = (x32 + x114); + x116 = (uint32_t)(x115 & UINT32_C(0xfffffff)); + x117 = (uint8_t)(x115 >> 28); + x118 = (x31 + (uint32_t)x117); + x119 = (x30 + x118); + x120 = (x29 + x119); + x121 = (x120 & UINT32_C(0x7ffffff)); + x122 = (uint8_t)(x120 >> 27); + x123 = (x28 + (uint32_t)x122); + x124 = (x27 + x123); + x125 = (x26 + x124); + x126 = (x25 + x125); + x127 = (uint32_t)(x126 & UINT32_C(0xfffffff)); + x128 = (uint8_t)(x126 >> 28); + x129 = (x24 + (uint32_t)x128); + x130 = (x23 + x129); + x131 = (x22 + x130); + x132 = (x131 & UINT32_C(0x7ffffff)); + x133 = (uint8_t)(x131 >> 27); + x134 = (x21 + (uint32_t)x133); + x135 = (x20 + x134); + x136 = (x19 + x135); + x137 = (x17 + (uint32_t)x18); + x138 = (x16 + x137); + x139 = (x15 + x138); + x140 = (x139 & UINT32_C(0xfffffff)); + x141 = (uint8_t)(x139 >> 28); + x142 = (x14 + (uint32_t)x141); + x143 = (x13 + x142); + x144 = (x12 + x143); + x145 = (x144 & UINT32_C(0x7ffffff)); + x146 = (fiat_secp521r1_uint1)(x144 >> 27); + x147 = (x11 + (uint32_t)x146); + x148 = (x10 + x147); + x149 = (x9 + x148); + x150 = (x8 + x149); + x151 = (uint32_t)(x150 & UINT32_C(0xfffffff)); + x152 = (uint8_t)(x150 >> 28); + x153 = (x7 + (uint32_t)x152); + x154 = (x6 + x153); + x155 = (x5 + x154); + x156 = (x155 & UINT32_C(0x7ffffff)); + x157 = (uint8_t)(x155 >> 27); + x158 = (x4 + (uint32_t)x157); + x159 = (x3 + x158); + x160 = (x2 + x159); + x161 = (x1 + x160); + out1[0] = x70; + out1[1] = x75; + out1[2] = x81; + out1[3] = x86; + out1[4] = x92; + out1[5] = x97; + out1[6] = x101; + out1[7] = x105; + out1[8] = x110; + out1[9] = x116; + out1[10] = x121; + out1[11] = x127; + out1[12] = x132; + out1[13] = x136; + out1[14] = x140; + out1[15] = x145; + out1[16] = x151; + out1[17] = x156; + out1[18] = x161; +} + +/* END verbatim fiat code */ + +/* curve-related constants */ + +static const limb_t const_one[19] = { + UINT32_C(0x00000001), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0x00000000) +}; + +static const limb_t const_b[19] = { + UINT32_C(0x0B503F00), UINT32_C(0x0451FD46), UINT32_C(0x0869E3DE), + UINT32_C(0x03F107A5), UINT32_C(0x0C1CD5CF), UINT32_C(0x074EEC6F), + UINT32_C(0x00B29605), UINT32_C(0x0C7E937B), UINT32_C(0x0193951E), + UINT32_C(0x0213C2AC), UINT32_C(0x013231DE), UINT32_C(0x07CEE2D2), + UINT32_C(0x06E66CC5), UINT32_C(0x0516D392), UINT32_C(0x068540EE), + UINT32_C(0x01A21A0B), UINT32_C(0x09343F25), UINT32_C(0x072C31C3), + UINT32_C(0x014654FA) +}; + +/* LUT for scalar multiplication by comb interleaving */ +static const pt_aff_t lut_cmb[13][16] = { + { + { { UINT32_C(0x02E5BD66), UINT32_C(0x07E7E31C), UINT32_C(0x048537F2), + UINT32_C(0x067830AD), UINT32_C(0x0378CD22), UINT32_C(0x01E8BFEA), + UINT32_C(0x07F0EE09), UINT32_C(0x0FE75928), UINT32_C(0x04B5E77E), + UINT32_C(0x0A7B7542), UINT32_C(0x05EC0D69), UINT32_C(0x0487E0A2), + UINT32_C(0x06414FED), UINT32_C(0x04E32409), UINT32_C(0x0395B442), + UINT32_C(0x03ECB662), UINT32_C(0x09D39B3C), UINT32_C(0x00D6E080), + UINT32_C(0x031A1638) }, + { UINT32_C(0x0FD16650), UINT32_C(0x03E94769), UINT32_C(0x05848111), + UINT32_C(0x0610D44E), UINT32_C(0x0D84D4F1), UINT32_C(0x004FEB41), + UINT32_C(0x062A85C8), UINT32_C(0x0EF42640), UINT32_C(0x06E72995), + UINT32_C(0x0CCC592F), UINT32_C(0x07A2E4E7), UINT32_C(0x01A05EBE), + UINT32_C(0x0255E6D1), UINT32_C(0x04C7AA22), UINT32_C(0x0C7D1BD9), + UINT32_C(0x00A5FB42), UINT32_C(0x078008B9), UINT32_C(0x054F1347), + UINT32_C(0x0460E4A5) } }, + { { UINT32_C(0x0E37AD7D), UINT32_C(0x0119D2ED), UINT32_C(0x05D40B4B), + UINT32_C(0x0210C586), UINT32_C(0x086EBAD2), UINT32_C(0x05AD67F8), + UINT32_C(0x00ED35E8), UINT32_C(0x0A483205), UINT32_C(0x03F164A3), + UINT32_C(0x051BA35A), UINT32_C(0x074225AF), UINT32_C(0x0AE796B5), + UINT32_C(0x06C48F66), UINT32_C(0x05A95372), UINT32_C(0x05959479), + UINT32_C(0x01D6A64B), UINT32_C(0x0232BBB2), UINT32_C(0x04887BC5), + UINT32_C(0x069CF4D4) }, + { UINT32_C(0x0E86C0E5), UINT32_C(0x0588CA1E), UINT32_C(0x0B2084BE), + UINT32_C(0x01379274), UINT32_C(0x0C33C417), UINT32_C(0x0477B0F1), + UINT32_C(0x016AD676), UINT32_C(0x0DC575B0), UINT32_C(0x02DD4CF8), + UINT32_C(0x0B9DD85C), UINT32_C(0x0563F46A), UINT32_C(0x0C5F4BE2), + UINT32_C(0x020AA740), UINT32_C(0x078AABFD), UINT32_C(0x0AB814F2), + UINT32_C(0x01F86C6C), UINT32_C(0x05BBB32F), UINT32_C(0x072FBF4C), + UINT32_C(0x04FA6C0E) } }, + { { UINT32_C(0x0C8F3078), UINT32_C(0x02B5096E), UINT32_C(0x062E71AB), + UINT32_C(0x043CDB12), UINT32_C(0x068CA75F), UINT32_C(0x03C4DF9E), + UINT32_C(0x038897F5), UINT32_C(0x0E301423), UINT32_C(0x03C0C6D5), + UINT32_C(0x0F59C870), UINT32_C(0x03571E2E), UINT32_C(0x04933C0F), + UINT32_C(0x076D4FC3), UINT32_C(0x03D2CB77), UINT32_C(0x004EB0BF), + UINT32_C(0x03C3391C), UINT32_C(0x08658E7B), UINT32_C(0x00A524F4), + UINT32_C(0x0194AFCF) }, + { UINT32_C(0x0EB090CB), UINT32_C(0x03CC3E8D), UINT32_C(0x09EFF02E), + UINT32_C(0x00E4AE6A), UINT32_C(0x0DE747C0), UINT32_C(0x00473D7F), + UINT32_C(0x0188AA01), UINT32_C(0x072CF374), UINT32_C(0x06897C90), + UINT32_C(0x08E10F76), UINT32_C(0x02F93406), UINT32_C(0x0147B760), + UINT32_C(0x03A1CB80), UINT32_C(0x00E6C7F4), UINT32_C(0x0A811291), + UINT32_C(0x02B73114), UINT32_C(0x03ADD914), UINT32_C(0x037BACC0), + UINT32_C(0x056F9BBC) } }, + { { UINT32_C(0x0816ECD4), UINT32_C(0x04EAD882), UINT32_C(0x04C33403), + UINT32_C(0x07EA1FB8), UINT32_C(0x0F11BE54), UINT32_C(0x043738EE), + UINT32_C(0x064D36F9), UINT32_C(0x0FC698D8), UINT32_C(0x0308D0AB), + UINT32_C(0x0298BB18), UINT32_C(0x02585EE2), UINT32_C(0x08A3C063), + UINT32_C(0x023D520C), UINT32_C(0x02F91707), UINT32_C(0x0B073A0C), + UINT32_C(0x0365FDA0), UINT32_C(0x0EC68DDD), UINT32_C(0x0333AB6F), + UINT32_C(0x015B5747) }, + { UINT32_C(0x0525251B), UINT32_C(0x06B8BC90), UINT32_C(0x0DF8F6B8), + UINT32_C(0x06254BBB), UINT32_C(0x097E79D9), UINT32_C(0x01647386), + UINT32_C(0x04A91D1A), UINT32_C(0x0DEC9E2B), UINT32_C(0x050F293C), + UINT32_C(0x07BCAAD7), UINT32_C(0x033144D9), UINT32_C(0x0375C76F), + UINT32_C(0x040A093C), UINT32_C(0x05AE2C16), UINT32_C(0x09D68478), + UINT32_C(0x058317A3), UINT32_C(0x054221A3), UINT32_C(0x07B37554), + UINT32_C(0x00F4B46D) } }, + { { UINT32_C(0x07CBE207), UINT32_C(0x04562796), UINT32_C(0x0A50CC3E), + UINT32_C(0x0757B0B9), UINT32_C(0x063D3D42), UINT32_C(0x07DC968C), + UINT32_C(0x079E2AB6), UINT32_C(0x0134DA35), UINT32_C(0x029E1396), + UINT32_C(0x0D6CCAE8), UINT32_C(0x0628B718), UINT32_C(0x0A64B12A), + UINT32_C(0x06E621D1), UINT32_C(0x0769A2A0), UINT32_C(0x0156D488), + UINT32_C(0x075BF157), UINT32_C(0x04304D45), UINT32_C(0x046B3C3C), + UINT32_C(0x05614E27) }, + { UINT32_C(0x09AD2A4E), UINT32_C(0x020EA86B), UINT32_C(0x001E6875), + UINT32_C(0x055D2511), UINT32_C(0x01F5CDB0), UINT32_C(0x03D2AFF6), + UINT32_C(0x007FAB76), UINT32_C(0x0057AC84), UINT32_C(0x069E5756), + UINT32_C(0x0688DC1A), UINT32_C(0x0744C7BB), UINT32_C(0x0EDB2096), + UINT32_C(0x053B873A), UINT32_C(0x01844532), UINT32_C(0x07AE938E), + UINT32_C(0x055557A2), UINT32_C(0x0BE73E16), UINT32_C(0x0193515D), + UINT32_C(0x00A8B986) } }, + { { UINT32_C(0x0A0CDB9A), UINT32_C(0x040E02DD), UINT32_C(0x035205D9), + UINT32_C(0x0049F499), UINT32_C(0x02140570), UINT32_C(0x02F8C644), + UINT32_C(0x068CD8D7), UINT32_C(0x0663DA1B), UINT32_C(0x05BC5332), + UINT32_C(0x022CA5E7), UINT32_C(0x058A9E53), UINT32_C(0x02550FBC), + UINT32_C(0x035F05E1), UINT32_C(0x076AEE3F), UINT32_C(0x0B4315CF), + UINT32_C(0x01A39573), UINT32_C(0x0BFEA8DE), UINT32_C(0x024B3FBD), + UINT32_C(0x0229D610) }, + { UINT32_C(0x0E48C808), UINT32_C(0x0074F92C), UINT32_C(0x0336BAB1), + UINT32_C(0x001C7E90), UINT32_C(0x0CDB72B2), UINT32_C(0x06452A54), + UINT32_C(0x01C49198), UINT32_C(0x0B42A4AB), UINT32_C(0x048A90E8), + UINT32_C(0x03705637), UINT32_C(0x02BA9C17), UINT32_C(0x024FB4BA), + UINT32_C(0x00842F41), UINT32_C(0x01D6EAB3), UINT32_C(0x054FB229), + UINT32_C(0x00CA8770), UINT32_C(0x0253093A), UINT32_C(0x07F97744), + UINT32_C(0x025BECC0) } }, + { { UINT32_C(0x02FBCDA7), UINT32_C(0x007848D3), UINT32_C(0x01DFF031), + UINT32_C(0x07601567), UINT32_C(0x0BA52FB0), UINT32_C(0x01E6AE23), + UINT32_C(0x01AA852F), UINT32_C(0x003C996A), UINT32_C(0x0445908E), + UINT32_C(0x070CC265), UINT32_C(0x0257D5EB), UINT32_C(0x08E13BB7), + UINT32_C(0x03786D30), UINT32_C(0x049FB9B6), UINT32_C(0x0924861A), + UINT32_C(0x0065D2B4), UINT32_C(0x0D5B39AF), UINT32_C(0x07309872), + UINT32_C(0x01F8FA63) }, + { UINT32_C(0x022A71C9), UINT32_C(0x01A01FB0), UINT32_C(0x0FD3EE52), + UINT32_C(0x0555F222), UINT32_C(0x0F0D8667), UINT32_C(0x05472FEE), + UINT32_C(0x0136FEE9), UINT32_C(0x08BC763F), UINT32_C(0x03D5D583), + UINT32_C(0x0C425583), UINT32_C(0x04F5CB83), UINT32_C(0x071A71E9), + UINT32_C(0x061B5508), UINT32_C(0x0676A851), UINT32_C(0x03ED5A08), + UINT32_C(0x01926DAA), UINT32_C(0x0FDB5234), UINT32_C(0x056DAF03), + UINT32_C(0x0423B963) } }, + { { UINT32_C(0x0CB8DB55), UINT32_C(0x02FE337B), UINT32_C(0x0F257BD3), + UINT32_C(0x02D303C7), UINT32_C(0x0C766E36), UINT32_C(0x0723F00C), + UINT32_C(0x03C3ADE8), UINT32_C(0x0BD00FFE), UINT32_C(0x01CCE27D), + UINT32_C(0x051C2372), UINT32_C(0x06A65BE2), UINT32_C(0x014B5A5E), + UINT32_C(0x042D0282), UINT32_C(0x05C7DE61), UINT32_C(0x06D4300F), + UINT32_C(0x0558FC54), UINT32_C(0x08CBE082), UINT32_C(0x03579724), + UINT32_C(0x01ADAB62) }, + { UINT32_C(0x01475465), UINT32_C(0x0343480A), UINT32_C(0x057BB2AC), + UINT32_C(0x0219888D), UINT32_C(0x06491BF6), UINT32_C(0x00CB25B2), + UINT32_C(0x010A4711), UINT32_C(0x09470A80), UINT32_C(0x01062C89), + UINT32_C(0x00BDAAFD), UINT32_C(0x020D32E9), UINT32_C(0x02E92D88), + UINT32_C(0x026EB483), UINT32_C(0x06F824B5), UINT32_C(0x03EDBF63), + UINT32_C(0x0664D233), UINT32_C(0x023AD4F9), UINT32_C(0x04E2AE27), + UINT32_C(0x06D1A368) } }, + { { UINT32_C(0x03110AE0), UINT32_C(0x07817A85), UINT32_C(0x034820ED), + UINT32_C(0x00855E1A), UINT32_C(0x003FE30C), UINT32_C(0x06D5A04E), + UINT32_C(0x06FA73CC), UINT32_C(0x04FE0287), UINT32_C(0x00A69E67), + UINT32_C(0x0A10B0EC), UINT32_C(0x049E4D24), UINT32_C(0x0ED35994), + UINT32_C(0x01A7E8AC), UINT32_C(0x04CF74F1), UINT32_C(0x0923906A), + UINT32_C(0x03874645), UINT32_C(0x0DB42741), UINT32_C(0x060FE261), + UINT32_C(0x06C0376D) }, + { UINT32_C(0x00E64647), UINT32_C(0x039CB7C7), UINT32_C(0x0EABEA6B), + UINT32_C(0x02B29856), UINT32_C(0x00839A41), UINT32_C(0x07C5AB7D), + UINT32_C(0x0697B3AB), UINT32_C(0x06DD0BF0), UINT32_C(0x05A564EF), + UINT32_C(0x02647BF3), UINT32_C(0x05856454), UINT32_C(0x02A635A2), + UINT32_C(0x033DA644), UINT32_C(0x05BCCA9A), UINT32_C(0x0EDDD106), + UINT32_C(0x011D4E4A), UINT32_C(0x0AEDB782), UINT32_C(0x03AFB62C), + UINT32_C(0x0215A0FC) } }, + { { UINT32_C(0x08D6A19B), UINT32_C(0x07F0B241), UINT32_C(0x077BC8F1), + UINT32_C(0x0063CE4B), UINT32_C(0x0C37FB3D), UINT32_C(0x075E9165), + UINT32_C(0x049192AB), UINT32_C(0x06266967), UINT32_C(0x03B30963), + UINT32_C(0x01CFE3F4), UINT32_C(0x059B66F2), UINT32_C(0x01FBFFC2), + UINT32_C(0x01D577D5), UINT32_C(0x022DBBF0), UINT32_C(0x05A1A072), + UINT32_C(0x07948C2D), UINT32_C(0x08690F81), UINT32_C(0x0490C833), + UINT32_C(0x02663733) }, + { UINT32_C(0x0BFD0575), UINT32_C(0x0091A695), UINT32_C(0x07FC8952), + UINT32_C(0x0313D53F), UINT32_C(0x0DDFD693), UINT32_C(0x06458C70), + UINT32_C(0x058761CC), UINT32_C(0x02EB8CF9), UINT32_C(0x02D963FF), + UINT32_C(0x0AEE4EE7), UINT32_C(0x05DC6CA8), UINT32_C(0x0D2B3143), + UINT32_C(0x038ADEF3), UINT32_C(0x033E9457), UINT32_C(0x035B245D), + UINT32_C(0x01424975), UINT32_C(0x03DAB987), UINT32_C(0x00C4D404), + UINT32_C(0x04DF5768) } }, + { { UINT32_C(0x03C8C9ED), UINT32_C(0x06F39969), UINT32_C(0x08DA5A85), + UINT32_C(0x02407274), UINT32_C(0x0D6CDEB2), UINT32_C(0x03B609F5), + UINT32_C(0x06CA4BF5), UINT32_C(0x0D62A309), UINT32_C(0x0257EAE4), + UINT32_C(0x0CFF528C), UINT32_C(0x07CEB388), UINT32_C(0x0A606548), + UINT32_C(0x030BB457), UINT32_C(0x01345DCC), UINT32_C(0x09ED3B10), + UINT32_C(0x04855085), UINT32_C(0x07A5F679), UINT32_C(0x00234E85), + UINT32_C(0x06872ECB) }, + { UINT32_C(0x0CBA4DF5), UINT32_C(0x00BC43C9), UINT32_C(0x0996C3CC), + UINT32_C(0x01E2EC93), UINT32_C(0x0B15F26C), UINT32_C(0x05CB18FB), + UINT32_C(0x05F5A1D1), UINT32_C(0x0A483295), UINT32_C(0x0741A53D), + UINT32_C(0x0F4FEFBE), UINT32_C(0x052DED75), UINT32_C(0x09B06028), + UINT32_C(0x0671464F), UINT32_C(0x0741E002), UINT32_C(0x0E40CE62), + UINT32_C(0x012DA7C5), UINT32_C(0x067A9058), UINT32_C(0x07A9F1DD), + UINT32_C(0x04688275) } }, + { { UINT32_C(0x02AF535C), UINT32_C(0x046A5ECE), UINT32_C(0x0CB00D43), + UINT32_C(0x063584D5), UINT32_C(0x0F881F87), UINT32_C(0x02697B14), + UINT32_C(0x074F1FC7), UINT32_C(0x0AF5B0AF), UINT32_C(0x06F83FC9), + UINT32_C(0x0A8A203E), UINT32_C(0x0469A19B), UINT32_C(0x0A092434), + UINT32_C(0x069E17EC), UINT32_C(0x0773D1CD), UINT32_C(0x0F547B8E), + UINT32_C(0x01CACEC5), UINT32_C(0x0B26EDB6), UINT32_C(0x03AE5202), + UINT32_C(0x06B82C9D) }, + { UINT32_C(0x0FA0D000), UINT32_C(0x015C3536), UINT32_C(0x0470ADB0), + UINT32_C(0x008A151A), UINT32_C(0x030884ED), UINT32_C(0x06EC1F74), + UINT32_C(0x01E13D93), UINT32_C(0x0E97FCF4), UINT32_C(0x0043361E), + UINT32_C(0x05B81C21), UINT32_C(0x048F0898), UINT32_C(0x00CAD0C5), + UINT32_C(0x06243416), UINT32_C(0x03EBACFF), UINT32_C(0x0068471C), + UINT32_C(0x022858FC), UINT32_C(0x0A700CD1), UINT32_C(0x004BCA70), + UINT32_C(0x03CB25EA) } }, + { { UINT32_C(0x0F70ACE0), UINT32_C(0x00C2460B), UINT32_C(0x0A7F627F), + UINT32_C(0x01D6384B), UINT32_C(0x0C9F9078), UINT32_C(0x02A9923F), + UINT32_C(0x02B743F1), UINT32_C(0x0C36EE4D), UINT32_C(0x01856917), + UINT32_C(0x03329552), UINT32_C(0x05918A93), UINT32_C(0x0EC471DC), + UINT32_C(0x01946C41), UINT32_C(0x00039881), UINT32_C(0x05DFF9D2), + UINT32_C(0x05874A6F), UINT32_C(0x04306946), UINT32_C(0x05AB8B53), + UINT32_C(0x0553A131) }, + { UINT32_C(0x04C78230), UINT32_C(0x025BCE40), UINT32_C(0x0CD6DA86), + UINT32_C(0x054A8CE5), UINT32_C(0x0BD7BB78), UINT32_C(0x029A965C), + UINT32_C(0x068F11B8), UINT32_C(0x02FBC1A0), UINT32_C(0x06354357), + UINT32_C(0x0CCD4DBD), UINT32_C(0x051102A2), UINT32_C(0x031FD9B0), + UINT32_C(0x02C008A8), UINT32_C(0x00AD491F), UINT32_C(0x0BB60D3F), + UINT32_C(0x02A28F80), UINT32_C(0x008E75C4), UINT32_C(0x0522E322), + UINT32_C(0x03343F73) } }, + { { UINT32_C(0x0002D68B), UINT32_C(0x07643017), UINT32_C(0x088AD06A), + UINT32_C(0x0408925D), UINT32_C(0x08F2C855), UINT32_C(0x036834C5), + UINT32_C(0x0289A9D7), UINT32_C(0x0719D483), UINT32_C(0x032123DA), + UINT32_C(0x0B0A9B01), UINT32_C(0x0230FC26), UINT32_C(0x08B0CFCD), + UINT32_C(0x074393E1), UINT32_C(0x0439CA9A), UINT32_C(0x089E646F), + UINT32_C(0x024D4EB8), UINT32_C(0x036D4EC5), UINT32_C(0x03F0431F), + UINT32_C(0x0580DCFB) }, + { UINT32_C(0x0D90B740), UINT32_C(0x066AECA5), UINT32_C(0x0B5967E7), + UINT32_C(0x07CE13A8), UINT32_C(0x0CB918FF), UINT32_C(0x052A2ED5), + UINT32_C(0x009DC3A7), UINT32_C(0x092EBC54), UINT32_C(0x07A491ED), + UINT32_C(0x0644023D), UINT32_C(0x06F1C343), UINT32_C(0x0EED295B), + UINT32_C(0x0173D4B0), UINT32_C(0x04FE8C9E), UINT32_C(0x0C06A3FA), + UINT32_C(0x0028401A), UINT32_C(0x0FC38BCB), UINT32_C(0x020029B9), + UINT32_C(0x03C565C1) } }, + { { UINT32_C(0x0EDA25DC), UINT32_C(0x03927618), UINT32_C(0x0EDB2C58), + UINT32_C(0x00B2BAA3), UINT32_C(0x0E7BCCF6), UINT32_C(0x03A11FFE), + UINT32_C(0x02001D5C), UINT32_C(0x076D7291), UINT32_C(0x029BC068), + UINT32_C(0x094260B9), UINT32_C(0x0671EECC), UINT32_C(0x07B0A2FB), + UINT32_C(0x047A1899), UINT32_C(0x07CFA289), UINT32_C(0x065A085F), + UINT32_C(0x041FBFCB), UINT32_C(0x0050FB67), UINT32_C(0x02D9296D), + UINT32_C(0x05D31913) }, + { UINT32_C(0x021A0C30), UINT32_C(0x07BBBC48), UINT32_C(0x077F7A30), + UINT32_C(0x024F84DD), UINT32_C(0x00FC19E6), UINT32_C(0x035C1B4C), + UINT32_C(0x02861399), UINT32_C(0x0CE0D90B), UINT32_C(0x00E21952), + UINT32_C(0x0A696F7C), UINT32_C(0x03D6F2B5), UINT32_C(0x07F2D73D), + UINT32_C(0x03F2D910), UINT32_C(0x00119F7C), UINT32_C(0x01B7B782), + UINT32_C(0x02CC95B4), UINT32_C(0x033CD00B), UINT32_C(0x005F0FE8), + UINT32_C(0x046BCE9F) } }, + { { UINT32_C(0x016A8803), UINT32_C(0x057D0E0C), UINT32_C(0x04902444), + UINT32_C(0x06BC911C), UINT32_C(0x0C88373E), UINT32_C(0x0302735A), + UINT32_C(0x07E0A60D), UINT32_C(0x04C9D429), UINT32_C(0x05543A90), + UINT32_C(0x0EE4D9AC), UINT32_C(0x050794BC), UINT32_C(0x0985C982), + UINT32_C(0x0595F0A9), UINT32_C(0x05ABA2C4), UINT32_C(0x07307B7D), + UINT32_C(0x06A58CDB), UINT32_C(0x08CC2A00), UINT32_C(0x019E61E1), + UINT32_C(0x0363A648) }, + { UINT32_C(0x09792D19), UINT32_C(0x04677C73), UINT32_C(0x08631594), + UINT32_C(0x032F8F6A), UINT32_C(0x098EA86F), UINT32_C(0x032B9330), + UINT32_C(0x009CD434), UINT32_C(0x04D14790), UINT32_C(0x06B8C324), + UINT32_C(0x035461EE), UINT32_C(0x06E597DA), UINT32_C(0x00182BBE), + UINT32_C(0x04A3C432), UINT32_C(0x045AA031), UINT32_C(0x014A30EC), + UINT32_C(0x009C13A2), UINT32_C(0x0C730FBE), UINT32_C(0x06A8A94C), + UINT32_C(0x049EC08E) } }, + }, + { + { { UINT32_C(0x043C6A8B), UINT32_C(0x069E114E), UINT32_C(0x02D17119), + UINT32_C(0x07161008), UINT32_C(0x04253BA7), UINT32_C(0x06D7E9D1), + UINT32_C(0x07AFFFEA), UINT32_C(0x0C20088E), UINT32_C(0x009D84CD), + UINT32_C(0x094B5A8B), UINT32_C(0x070C9B19), UINT32_C(0x0A140336), + UINT32_C(0x059D32DC), UINT32_C(0x07D5C770), UINT32_C(0x0B702098), + UINT32_C(0x0646FC6A), UINT32_C(0x06312DAB), UINT32_C(0x05DEF39B), + UINT32_C(0x07B32BAC) }, + { UINT32_C(0x06B04438), UINT32_C(0x0086BBC2), UINT32_C(0x0CE331EB), + UINT32_C(0x07A1DB2A), UINT32_C(0x04798584), UINT32_C(0x0632A66E), + UINT32_C(0x03A4F5AE), UINT32_C(0x03B41996), UINT32_C(0x061944D5), + UINT32_C(0x0E8ECAB0), UINT32_C(0x00E38A9B), UINT32_C(0x0BBF7088), + UINT32_C(0x022E1052), UINT32_C(0x00FB1445), UINT32_C(0x0FF1C5EA), + UINT32_C(0x034DB2F7), UINT32_C(0x04C560D6), UINT32_C(0x050E7FEA), + UINT32_C(0x00B97B7C) } }, + { { UINT32_C(0x004ED5E3), UINT32_C(0x012DA268), UINT32_C(0x08C92EF3), + UINT32_C(0x06F60BF9), UINT32_C(0x0656B119), UINT32_C(0x014823AF), + UINT32_C(0x058D04AC), UINT32_C(0x099D3419), UINT32_C(0x00CFAE71), + UINT32_C(0x0B423A38), UINT32_C(0x05EA80E2), UINT32_C(0x06C1F218), + UINT32_C(0x03E72AD5), UINT32_C(0x0691F49A), UINT32_C(0x04310FAB), + UINT32_C(0x05D250AD), UINT32_C(0x084D7BFA), UINT32_C(0x070595DE), + UINT32_C(0x017825D9) }, + { UINT32_C(0x0A7D5B37), UINT32_C(0x00B0A7A2), UINT32_C(0x0ED3BDEF), + UINT32_C(0x02B29FDB), UINT32_C(0x085BCC71), UINT32_C(0x0455FDD9), + UINT32_C(0x0595CF1F), UINT32_C(0x0040CCA6), UINT32_C(0x04FA2F23), + UINT32_C(0x04A05DD3), UINT32_C(0x07E18B4E), UINT32_C(0x045A2A46), + UINT32_C(0x058F2043), UINT32_C(0x038FC52D), UINT32_C(0x0A7666DC), + UINT32_C(0x0701CE42), UINT32_C(0x04B38B92), UINT32_C(0x01AD842D), + UINT32_C(0x07A0B6A0) } }, + { { UINT32_C(0x029D2024), UINT32_C(0x0728395A), UINT32_C(0x04DB516D), + UINT32_C(0x0504C2CE), UINT32_C(0x03C5DEB1), UINT32_C(0x041CFF48), + UINT32_C(0x014AE223), UINT32_C(0x0856531F), UINT32_C(0x02EC3F65), + UINT32_C(0x0A46F536), UINT32_C(0x04ECB2AA), UINT32_C(0x0FB7289E), + UINT32_C(0x03DE9EFF), UINT32_C(0x0724BAA3), UINT32_C(0x0508D541), + UINT32_C(0x051B73BA), UINT32_C(0x0B38749E), UINT32_C(0x044097DF), + UINT32_C(0x00E5AC8E) }, + { UINT32_C(0x0DDD93A9), UINT32_C(0x04295052), UINT32_C(0x0E03B84C), + UINT32_C(0x00B38799), UINT32_C(0x037F6A48), UINT32_C(0x07614753), + UINT32_C(0x05765258), UINT32_C(0x0E0CA450), UINT32_C(0x07CFB537), + UINT32_C(0x07342BEF), UINT32_C(0x05C319BB), UINT32_C(0x04F3A1F5), + UINT32_C(0x04762545), UINT32_C(0x0589360C), UINT32_C(0x0E5A46C8), + UINT32_C(0x02744137), UINT32_C(0x05E9E991), UINT32_C(0x01523BC2), + UINT32_C(0x062CDAB6) } }, + { { UINT32_C(0x090E92D6), UINT32_C(0x00FA75A5), UINT32_C(0x040D6969), + UINT32_C(0x011D7DDB), UINT32_C(0x0B02AC62), UINT32_C(0x07679C7F), + UINT32_C(0x07FD8A06), UINT32_C(0x0A623D2A), UINT32_C(0x034C8ED2), + UINT32_C(0x07FB351F), UINT32_C(0x008857BA), UINT32_C(0x09AD9171), + UINT32_C(0x03CB7A5B), UINT32_C(0x01A56DB4), UINT32_C(0x09225D29), + UINT32_C(0x07819EC5), UINT32_C(0x0645D37A), UINT32_C(0x0618AED1), + UINT32_C(0x053A82A2) }, + { UINT32_C(0x0662F537), UINT32_C(0x00AB8407), UINT32_C(0x0FF98DF8), + UINT32_C(0x03C0F116), UINT32_C(0x0C87DD6F), UINT32_C(0x00995A87), + UINT32_C(0x036E7BF1), UINT32_C(0x0318B15E), UINT32_C(0x01116415), + UINT32_C(0x00A53CD8), UINT32_C(0x0237AEF5), UINT32_C(0x065DCC5D), + UINT32_C(0x048F2118), UINT32_C(0x011F3E13), UINT32_C(0x0AD27061), + UINT32_C(0x02B7B666), UINT32_C(0x01CB618D), UINT32_C(0x02EC555A), + UINT32_C(0x058DF8C5) } }, + { { UINT32_C(0x0B9839DA), UINT32_C(0x0047D336), UINT32_C(0x09E93377), + UINT32_C(0x00074C09), UINT32_C(0x08B5F722), UINT32_C(0x06A0986D), + UINT32_C(0x03ABD41C), UINT32_C(0x057C1CAA), UINT32_C(0x02B2ACCA), + UINT32_C(0x0FC9B996), UINT32_C(0x05488187), UINT32_C(0x07861011), + UINT32_C(0x0163907B), UINT32_C(0x07F6DAF7), UINT32_C(0x0363BC0E), + UINT32_C(0x058EF00F), UINT32_C(0x05446B66), UINT32_C(0x0514AA79), + UINT32_C(0x04A03953) }, + { UINT32_C(0x0C1962CE), UINT32_C(0x06493BB1), UINT32_C(0x086D6126), + UINT32_C(0x00FCE569), UINT32_C(0x0DC92336), UINT32_C(0x015B8163), + UINT32_C(0x0432A31C), UINT32_C(0x0133A6EE), UINT32_C(0x0578D7AF), + UINT32_C(0x0840A2D3), UINT32_C(0x064C1FC2), UINT32_C(0x085837C8), + UINT32_C(0x0641237D), UINT32_C(0x054AF205), UINT32_C(0x0657C4E2), + UINT32_C(0x04B8B1E0), UINT32_C(0x00272237), UINT32_C(0x05B53E59), + UINT32_C(0x001FEA03) } }, + { { UINT32_C(0x0D2BF9A7), UINT32_C(0x01A65815), UINT32_C(0x06FC3341), + UINT32_C(0x065823F4), UINT32_C(0x01599DE7), UINT32_C(0x070CA981), + UINT32_C(0x067E13C8), UINT32_C(0x009A9A6A), UINT32_C(0x0229B72F), + UINT32_C(0x09B1BC4A), UINT32_C(0x06BCE69A), UINT32_C(0x0FA69B0D), + UINT32_C(0x078B83C0), UINT32_C(0x06E62A5C), UINT32_C(0x021D206C), + UINT32_C(0x04E0CE16), UINT32_C(0x0F728EF3), UINT32_C(0x0453D52E), + UINT32_C(0x01844B54) }, + { UINT32_C(0x020C30CB), UINT32_C(0x04E85BEE), UINT32_C(0x095E4EAF), + UINT32_C(0x075E0168), UINT32_C(0x039C14AF), UINT32_C(0x0370EA5A), + UINT32_C(0x05B0F157), UINT32_C(0x02E11B96), UINT32_C(0x042E3824), + UINT32_C(0x0D5DC5BB), UINT32_C(0x00451C96), UINT32_C(0x0E911392), + UINT32_C(0x0724269B), UINT32_C(0x04003692), UINT32_C(0x076FEA68), + UINT32_C(0x033CBDE1), UINT32_C(0x0417AF7D), UINT32_C(0x00B9592D), + UINT32_C(0x027FA0B4) } }, + { { UINT32_C(0x0B2E6D92), UINT32_C(0x06E8F69A), UINT32_C(0x0DCD1AA5), + UINT32_C(0x01FB27B9), UINT32_C(0x04974F21), UINT32_C(0x027768BA), + UINT32_C(0x02769E05), UINT32_C(0x08C4A5CC), UINT32_C(0x047AF64B), + UINT32_C(0x08B89BB2), UINT32_C(0x02ED5662), UINT32_C(0x03939461), + UINT32_C(0x01F7401B), UINT32_C(0x06FDF357), UINT32_C(0x019C98D9), + UINT32_C(0x07B1E9DD), UINT32_C(0x075DC034), UINT32_C(0x01E0054F), + UINT32_C(0x02A2F727) }, + { UINT32_C(0x0EB71C5F), UINT32_C(0x023BF702), UINT32_C(0x02236711), + UINT32_C(0x012F6D73), UINT32_C(0x0CA22E0A), UINT32_C(0x02359757), + UINT32_C(0x0157DA08), UINT32_C(0x05CB0525), UINT32_C(0x0102CBFE), + UINT32_C(0x0854B694), UINT32_C(0x07F9F306), UINT32_C(0x0A6E3855), + UINT32_C(0x024CCD83), UINT32_C(0x0220CC0E), UINT32_C(0x0AAD6848), + UINT32_C(0x0783A366), UINT32_C(0x0B9AD104), UINT32_C(0x02844B14), + UINT32_C(0x07B5BC13) } }, + { { UINT32_C(0x01490429), UINT32_C(0x07C3B47C), UINT32_C(0x0DB7A58B), + UINT32_C(0x04D10D93), UINT32_C(0x08CA405B), UINT32_C(0x07FD087B), + UINT32_C(0x07C88AC9), UINT32_C(0x07D54451), UINT32_C(0x07010F32), + UINT32_C(0x06D62976), UINT32_C(0x03752EE7), UINT32_C(0x0A2326FD), + UINT32_C(0x00445040), UINT32_C(0x03605DB9), UINT32_C(0x03194920), + UINT32_C(0x01F8F0DF), UINT32_C(0x0F321EF5), UINT32_C(0x0297EC47), + UINT32_C(0x05C97D9A) }, + { UINT32_C(0x087CA374), UINT32_C(0x04D9BD85), UINT32_C(0x09E4C1E2), + UINT32_C(0x05C6B60F), UINT32_C(0x03338BE0), UINT32_C(0x06C38E9F), + UINT32_C(0x030527CA), UINT32_C(0x0F28850A), UINT32_C(0x039421C7), + UINT32_C(0x02DE48C5), UINT32_C(0x0652719F), UINT32_C(0x097E2E6B), + UINT32_C(0x0758DD1C), UINT32_C(0x06788A64), UINT32_C(0x01CDEC4A), + UINT32_C(0x0314A216), UINT32_C(0x022EE734), UINT32_C(0x023BD455), + UINT32_C(0x05EC7716) } }, + { { UINT32_C(0x03ACF0F9), UINT32_C(0x0203D95A), UINT32_C(0x0286435B), + UINT32_C(0x01818DC4), UINT32_C(0x02821B92), UINT32_C(0x06AE5102), + UINT32_C(0x07066934), UINT32_C(0x07BC9150), UINT32_C(0x07BA5607), + UINT32_C(0x0EC5981C), UINT32_C(0x04C69569), UINT32_C(0x03CC0C2A), + UINT32_C(0x07DA94A0), UINT32_C(0x07E65511), UINT32_C(0x086234FB), + UINT32_C(0x05407465), UINT32_C(0x0F825CD7), UINT32_C(0x03F370CC), + UINT32_C(0x00DC963A) }, + { UINT32_C(0x09436D81), UINT32_C(0x04465793), UINT32_C(0x041DBE76), + UINT32_C(0x0384C090), UINT32_C(0x005C5350), UINT32_C(0x07296D6A), + UINT32_C(0x04712C6D), UINT32_C(0x0B8974CF), UINT32_C(0x07A230E5), + UINT32_C(0x0CBF52A8), UINT32_C(0x016C1814), UINT32_C(0x06EDC3F7), + UINT32_C(0x0627F679), UINT32_C(0x0750029A), UINT32_C(0x06E2AA55), + UINT32_C(0x0245FF68), UINT32_C(0x0F8F41C6), UINT32_C(0x00A2BB27), + UINT32_C(0x052BDC1F) } }, + { { UINT32_C(0x06C8D427), UINT32_C(0x0648C043), UINT32_C(0x045E9C01), + UINT32_C(0x042CC909), UINT32_C(0x089A90AA), UINT32_C(0x007114E3), + UINT32_C(0x0085B7C3), UINT32_C(0x0B9DE134), UINT32_C(0x06B0A9E9), + UINT32_C(0x0AAAEBCC), UINT32_C(0x0092A52A), UINT32_C(0x0D6E2713), + UINT32_C(0x05857362), UINT32_C(0x0118376C), UINT32_C(0x000A08F8), + UINT32_C(0x003DE32F), UINT32_C(0x0E3FE6ED), UINT32_C(0x06CFB412), + UINT32_C(0x043D1662) }, + { UINT32_C(0x0D400463), UINT32_C(0x0448C05A), UINT32_C(0x0AE67E6E), + UINT32_C(0x059369CB), UINT32_C(0x0A23C77C), UINT32_C(0x06E7F666), + UINT32_C(0x05BB8233), UINT32_C(0x095E95B6), UINT32_C(0x0284C07C), + UINT32_C(0x0F6C7097), UINT32_C(0x0443F5D5), UINT32_C(0x0301FE7F), + UINT32_C(0x023010C9), UINT32_C(0x009D2363), UINT32_C(0x07BD65C2), + UINT32_C(0x07E297A0), UINT32_C(0x034DDA50), UINT32_C(0x07ADC7E7), + UINT32_C(0x03060E2B) } }, + { { UINT32_C(0x0924C15F), UINT32_C(0x04E07505), UINT32_C(0x08D0DCCF), + UINT32_C(0x01D04769), UINT32_C(0x02E2E204), UINT32_C(0x0713097A), + UINT32_C(0x07E9B59C), UINT32_C(0x07FDCF7A), UINT32_C(0x03E60E03), + UINT32_C(0x0423C6CD), UINT32_C(0x06A163F7), UINT32_C(0x07C0FA8B), + UINT32_C(0x01341D2B), UINT32_C(0x06745C51), UINT32_C(0x03C9DE3A), + UINT32_C(0x06D6D6F5), UINT32_C(0x0F5AF83F), UINT32_C(0x02698DEF), + UINT32_C(0x06091F29) }, + { UINT32_C(0x0DBEEE78), UINT32_C(0x060A02B3), UINT32_C(0x0558AE6B), + UINT32_C(0x07100333), UINT32_C(0x0A312381), UINT32_C(0x02FA9A13), + UINT32_C(0x06D1C0C3), UINT32_C(0x0C625336), UINT32_C(0x03B853CF), + UINT32_C(0x08B3BE37), UINT32_C(0x0104E5D9), UINT32_C(0x053B9B53), + UINT32_C(0x02A2D06C), UINT32_C(0x01CDC864), UINT32_C(0x0F04A867), + UINT32_C(0x07663226), UINT32_C(0x0FD6C54B), UINT32_C(0x040943C5), + UINT32_C(0x03C04D10) } }, + { { UINT32_C(0x090F8C80), UINT32_C(0x0582A686), UINT32_C(0x0BA42ED6), + UINT32_C(0x070A8F1E), UINT32_C(0x0AB02D12), UINT32_C(0x01EB5C3D), + UINT32_C(0x07479B29), UINT32_C(0x04D72C41), UINT32_C(0x0362562E), + UINT32_C(0x06FAF4FC), UINT32_C(0x033FED54), UINT32_C(0x0229578C), + UINT32_C(0x005B4CFB), UINT32_C(0x03BA05BF), UINT32_C(0x0B4A3FBC), + UINT32_C(0x07DBD5D5), UINT32_C(0x05E8639D), UINT32_C(0x07D5867F), + UINT32_C(0x027FE947) }, + { UINT32_C(0x01982847), UINT32_C(0x008A8D79), UINT32_C(0x0B215B64), + UINT32_C(0x06EDECCB), UINT32_C(0x045309BE), UINT32_C(0x055465DE), + UINT32_C(0x0426ED2E), UINT32_C(0x0D49D672), UINT32_C(0x01000B74), + UINT32_C(0x01206E3C), UINT32_C(0x061A0CA8), UINT32_C(0x020BEC03), + UINT32_C(0x02104AC7), UINT32_C(0x03FB64AC), UINT32_C(0x097C06BE), + UINT32_C(0x05DF7C1D), UINT32_C(0x0EFD23AB), UINT32_C(0x042BC8D8), + UINT32_C(0x02A649D7) } }, + { { UINT32_C(0x0643409F), UINT32_C(0x06A50E0A), UINT32_C(0x00C269C2), + UINT32_C(0x0130B8C0), UINT32_C(0x0B25EAD2), UINT32_C(0x07A4A516), + UINT32_C(0x0375B082), UINT32_C(0x0E197F8C), UINT32_C(0x0546B686), + UINT32_C(0x0B8287C5), UINT32_C(0x04A367C1), UINT32_C(0x07DF58A1), + UINT32_C(0x05B7DD15), UINT32_C(0x061763FD), UINT32_C(0x0E2DF8E8), + UINT32_C(0x05ABFC51), UINT32_C(0x087018C8), UINT32_C(0x05935143), + UINT32_C(0x05E9EFA4) }, + { UINT32_C(0x0AF2F29D), UINT32_C(0x0063F9B1), UINT32_C(0x0FB11A34), + UINT32_C(0x02D7C22E), UINT32_C(0x08AF67E7), UINT32_C(0x005AC16C), + UINT32_C(0x047EE080), UINT32_C(0x0B7677A2), UINT32_C(0x04500DDC), + UINT32_C(0x0137CD80), UINT32_C(0x01CF2369), UINT32_C(0x0DE177B8), + UINT32_C(0x018122DE), UINT32_C(0x00EDFC0C), UINT32_C(0x0048B9ED), + UINT32_C(0x043633B7), UINT32_C(0x0666D33E), UINT32_C(0x00317E10), + UINT32_C(0x066100C3) } }, + { { UINT32_C(0x037B93A2), UINT32_C(0x07917621), UINT32_C(0x048F411C), + UINT32_C(0x04EF1E2A), UINT32_C(0x0FC8F91F), UINT32_C(0x04090E1D), + UINT32_C(0x066F78F2), UINT32_C(0x0C2C0207), UINT32_C(0x065E2513), + UINT32_C(0x0F03BADB), UINT32_C(0x03689AF4), UINT32_C(0x0FE959E2), + UINT32_C(0x028B6A5E), UINT32_C(0x0101C577), UINT32_C(0x0C3A5192), + UINT32_C(0x03042F53), UINT32_C(0x0E2A6A29), UINT32_C(0x0231095D), + UINT32_C(0x06E29445) }, + { UINT32_C(0x07A00331), UINT32_C(0x041D85F7), UINT32_C(0x0D189E24), + UINT32_C(0x0294578C), UINT32_C(0x04A9E7A3), UINT32_C(0x037F260A), + UINT32_C(0x060D62BB), UINT32_C(0x07AED3DE), UINT32_C(0x0727FEAB), + UINT32_C(0x0283C99C), UINT32_C(0x05A11B56), UINT32_C(0x08953348), + UINT32_C(0x01A388E1), UINT32_C(0x028932F2), UINT32_C(0x0AFFD5A7), + UINT32_C(0x042CF6C6), UINT32_C(0x072339BA), UINT32_C(0x06344724), + UINT32_C(0x0395F757) } }, + { { UINT32_C(0x01328CE4), UINT32_C(0x01D69A89), UINT32_C(0x03D3B2E3), + UINT32_C(0x0780829F), UINT32_C(0x0848A488), UINT32_C(0x057B85BD), + UINT32_C(0x02051385), UINT32_C(0x06706AD6), UINT32_C(0x02D6482A), + UINT32_C(0x0A8717D0), UINT32_C(0x05383AC5), UINT32_C(0x03250B87), + UINT32_C(0x05C77D8D), UINT32_C(0x05198B6D), UINT32_C(0x03FACF90), + UINT32_C(0x062058A1), UINT32_C(0x008F96B1), UINT32_C(0x01F29CAF), + UINT32_C(0x00358EC7) }, + { UINT32_C(0x0B620A88), UINT32_C(0x06288694), UINT32_C(0x05B21FAC), + UINT32_C(0x03F64B44), UINT32_C(0x0DBD251D), UINT32_C(0x06B0D130), + UINT32_C(0x04314394), UINT32_C(0x02479C97), UINT32_C(0x003417DF), + UINT32_C(0x0318B1D4), UINT32_C(0x0762DFD7), UINT32_C(0x0DDA6BF1), + UINT32_C(0x0214A508), UINT32_C(0x0231DEBD), UINT32_C(0x0D8733B2), + UINT32_C(0x02ACA66C), UINT32_C(0x05C275E4), UINT32_C(0x07A8A625), + UINT32_C(0x001D2426) } }, + { { UINT32_C(0x0C95FF29), UINT32_C(0x0608C2C5), UINT32_C(0x0404108F), + UINT32_C(0x03383226), UINT32_C(0x07F8CE0C), UINT32_C(0x0600859C), + UINT32_C(0x04899A96), UINT32_C(0x00CCD8EA), UINT32_C(0x02796E7C), + UINT32_C(0x0FB706CC), UINT32_C(0x0111E6FC), UINT32_C(0x027E2706), + UINT32_C(0x03EBDDF3), UINT32_C(0x02838065), UINT32_C(0x0585FBC0), + UINT32_C(0x07572ED5), UINT32_C(0x0907E1E4), UINT32_C(0x017E67B8), + UINT32_C(0x041786F0) }, + { UINT32_C(0x04519732), UINT32_C(0x073D0689), UINT32_C(0x0DF32FF7), + UINT32_C(0x01246800), UINT32_C(0x068478E9), UINT32_C(0x031DEA3C), + UINT32_C(0x03E71E8F), UINT32_C(0x08C6C89E), UINT32_C(0x012CDD96), + UINT32_C(0x0AEEE8F4), UINT32_C(0x0121A9C4), UINT32_C(0x01F73DAA), + UINT32_C(0x033160E0), UINT32_C(0x062B3F6E), UINT32_C(0x081E3B9C), + UINT32_C(0x029ED0A5), UINT32_C(0x05F0DBFB), UINT32_C(0x0765E7EB), + UINT32_C(0x06026E18) } }, + }, + { + { { UINT32_C(0x0ED2EB86), UINT32_C(0x073B24CD), UINT32_C(0x01308B7E), + UINT32_C(0x001667D5), UINT32_C(0x06D840A4), UINT32_C(0x01CE15F3), + UINT32_C(0x00EC4628), UINT32_C(0x0BE255D7), UINT32_C(0x039A76B9), + UINT32_C(0x0CA76752), UINT32_C(0x02EA45FE), UINT32_C(0x0CB0A354), + UINT32_C(0x019D90B7), UINT32_C(0x036C0B82), UINT32_C(0x07E353B2), + UINT32_C(0x00B45E15), UINT32_C(0x0E1E3229), UINT32_C(0x06EED669), + UINT32_C(0x07975597) }, + { UINT32_C(0x04B5DE1E), UINT32_C(0x05185A2C), UINT32_C(0x0F1C1594), + UINT32_C(0x01D7FD5B), UINT32_C(0x0CD949EB), UINT32_C(0x02E191E5), + UINT32_C(0x03295CCA), UINT32_C(0x02F97A05), UINT32_C(0x052209AD), + UINT32_C(0x0C0AF1C4), UINT32_C(0x07F93AD2), UINT32_C(0x060F26C1), + UINT32_C(0x0274993E), UINT32_C(0x023CDD4A), UINT32_C(0x08D9B938), + UINT32_C(0x00D32B5E), UINT32_C(0x04FE5190), UINT32_C(0x01AB014D), + UINT32_C(0x05DD64A0) } }, + { { UINT32_C(0x0C2CA70B), UINT32_C(0x0346AE90), UINT32_C(0x0F8387AC), + UINT32_C(0x03ABE62A), UINT32_C(0x029DA053), UINT32_C(0x0041F61B), + UINT32_C(0x02CBC0BF), UINT32_C(0x05243AE9), UINT32_C(0x0360C16B), + UINT32_C(0x0C28A299), UINT32_C(0x0795D938), UINT32_C(0x02AC475A), + UINT32_C(0x0113BEAF), UINT32_C(0x05A671E6), UINT32_C(0x05C8C591), + UINT32_C(0x06924739), UINT32_C(0x02A54EEF), UINT32_C(0x02F274E3), + UINT32_C(0x0049A1CD) }, + { UINT32_C(0x0426994D), UINT32_C(0x07F97B31), UINT32_C(0x0DA0C788), + UINT32_C(0x04B6F8C8), UINT32_C(0x05463D1A), UINT32_C(0x07C155D5), + UINT32_C(0x00BA793E), UINT32_C(0x0AB08953), UINT32_C(0x042C3976), + UINT32_C(0x069C681F), UINT32_C(0x02ABCC5A), UINT32_C(0x024C8F72), + UINT32_C(0x067DF148), UINT32_C(0x0180DD65), UINT32_C(0x042A4819), + UINT32_C(0x01AFAAD4), UINT32_C(0x0334701F), UINT32_C(0x031ADC33), + UINT32_C(0x03AA0140) } }, + { { UINT32_C(0x0BEE1F7B), UINT32_C(0x07EA5E6A), UINT32_C(0x06C716A1), + UINT32_C(0x01C6DCD9), UINT32_C(0x00C62805), UINT32_C(0x06E99086), + UINT32_C(0x047E4182), UINT32_C(0x04E699EA), UINT32_C(0x017F98AF), + UINT32_C(0x0C64E476), UINT32_C(0x0464A2AE), UINT32_C(0x0AF646E7), + UINT32_C(0x0734C8DA), UINT32_C(0x069B3D13), UINT32_C(0x0BD58EFB), + UINT32_C(0x0572D3C4), UINT32_C(0x0889BAF4), UINT32_C(0x049A880A), + UINT32_C(0x01790356) }, + { UINT32_C(0x0D71A4FA), UINT32_C(0x017475C1), UINT32_C(0x0B53C845), + UINT32_C(0x00ED5EC3), UINT32_C(0x072B9DBC), UINT32_C(0x032C8366), + UINT32_C(0x02B3D21C), UINT32_C(0x0E8E8016), UINT32_C(0x04B6FF58), + UINT32_C(0x017276EC), UINT32_C(0x069855EF), UINT32_C(0x0342CFC2), + UINT32_C(0x00D109A0), UINT32_C(0x07614A72), UINT32_C(0x09DC301B), + UINT32_C(0x036B57F5), UINT32_C(0x06CB91C2), UINT32_C(0x03E8DF1A), + UINT32_C(0x070FD727) } }, + { { UINT32_C(0x032574BE), UINT32_C(0x04115A04), UINT32_C(0x0F98172F), + UINT32_C(0x04AEDED0), UINT32_C(0x02519CD4), UINT32_C(0x05A01A73), + UINT32_C(0x06EEA282), UINT32_C(0x0BBAAC38), UINT32_C(0x02CC4028), + UINT32_C(0x03AACD20), UINT32_C(0x01A067DD), UINT32_C(0x0AFED584), + UINT32_C(0x06846B34), UINT32_C(0x01F4D8B2), UINT32_C(0x00AB5080), + UINT32_C(0x02EFB0FB), UINT32_C(0x09F1C68E), UINT32_C(0x01829F05), + UINT32_C(0x008F3C67) }, + { UINT32_C(0x062EC0F0), UINT32_C(0x04CAAFE4), UINT32_C(0x08147733), + UINT32_C(0x038A422E), UINT32_C(0x0085656E), UINT32_C(0x02D1FFD4), + UINT32_C(0x0731016E), UINT32_C(0x022AA6C1), UINT32_C(0x04385C24), + UINT32_C(0x06B4D30A), UINT32_C(0x04FF86E3), UINT32_C(0x0540E9AE), + UINT32_C(0x039185FE), UINT32_C(0x0278D41E), UINT32_C(0x05EEE86F), + UINT32_C(0x05D399FD), UINT32_C(0x07D5B982), UINT32_C(0x0364A589), + UINT32_C(0x07E1654F) } }, + { { UINT32_C(0x0D8CB3CC), UINT32_C(0x06C254BF), UINT32_C(0x0FBC2C5D), + UINT32_C(0x07F746F2), UINT32_C(0x07E4259D), UINT32_C(0x022B49C4), + UINT32_C(0x04CE0ECE), UINT32_C(0x095F3130), UINT32_C(0x064022C7), + UINT32_C(0x076A7307), UINT32_C(0x074FEA23), UINT32_C(0x09CDD626), + UINT32_C(0x0612A401), UINT32_C(0x0562E226), UINT32_C(0x027BA2E0), + UINT32_C(0x01D98EB5), UINT32_C(0x0A54B2FF), UINT32_C(0x0345BAFC), + UINT32_C(0x05CE5083) }, + { UINT32_C(0x082FB619), UINT32_C(0x01E59C7B), UINT32_C(0x07C56C18), + UINT32_C(0x0594E677), UINT32_C(0x0EBA4C47), UINT32_C(0x01F1C6FF), + UINT32_C(0x016B9F48), UINT32_C(0x0443B057), UINT32_C(0x017930FC), + UINT32_C(0x0D94B0A6), UINT32_C(0x0501D4ED), UINT32_C(0x0EB5EA2F), + UINT32_C(0x03F2D8D0), UINT32_C(0x04A1DA92), UINT32_C(0x0A702231), + UINT32_C(0x063C2830), UINT32_C(0x06F5E127), UINT32_C(0x06BE79CE), + UINT32_C(0x06600B2F) } }, + { { UINT32_C(0x0F26ECDA), UINT32_C(0x0052168B), UINT32_C(0x0CBDB9E3), + UINT32_C(0x052FFD0A), UINT32_C(0x02FDCD7B), UINT32_C(0x05791EA2), + UINT32_C(0x03DF5472), UINT32_C(0x0544715D), UINT32_C(0x032F4FBD), + UINT32_C(0x05DA4E99), UINT32_C(0x000977D5), UINT32_C(0x0AEE5E82), + UINT32_C(0x07B5A2B7), UINT32_C(0x02494676), UINT32_C(0x0B416152), + UINT32_C(0x03AC76C7), UINT32_C(0x0B21FDC6), UINT32_C(0x04ECC50E), + UINT32_C(0x02A4E6AB) }, + { UINT32_C(0x031E0BB4), UINT32_C(0x05FC9964), UINT32_C(0x014AC466), + UINT32_C(0x038F82D0), UINT32_C(0x0C0B56B8), UINT32_C(0x0217513C), + UINT32_C(0x0498C923), UINT32_C(0x076EEC28), UINT32_C(0x03824F59), + UINT32_C(0x0B7B1382), UINT32_C(0x056FE399), UINT32_C(0x00794841), + UINT32_C(0x076FEEC8), UINT32_C(0x0219F413), UINT32_C(0x04ABDD19), + UINT32_C(0x04CE2F28), UINT32_C(0x0F2E86F7), UINT32_C(0x02F472AF), + UINT32_C(0x06774781) } }, + { { UINT32_C(0x0CEBC7BE), UINT32_C(0x00221686), UINT32_C(0x04E2E2B5), + UINT32_C(0x02865641), UINT32_C(0x0400F945), UINT32_C(0x01CF69C4), + UINT32_C(0x002D7B22), UINT32_C(0x04D5A98C), UINT32_C(0x075AA74B), + UINT32_C(0x0926F727), UINT32_C(0x0318AD6B), UINT32_C(0x009AE911), + UINT32_C(0x00216BA5), UINT32_C(0x0794C1D5), UINT32_C(0x047BB387), + UINT32_C(0x05890517), UINT32_C(0x0C438287), UINT32_C(0x04D6AF1C), + UINT32_C(0x010C34E7) }, + { UINT32_C(0x02E3859D), UINT32_C(0x06690EFE), UINT32_C(0x0F063DCD), + UINT32_C(0x068C490B), UINT32_C(0x06DE5321), UINT32_C(0x0225E5EC), + UINT32_C(0x0573AFDE), UINT32_C(0x0C5AD59A), UINT32_C(0x064D175A), + UINT32_C(0x09D71327), UINT32_C(0x03D7526B), UINT32_C(0x04C7D696), + UINT32_C(0x05C7C0BF), UINT32_C(0x04314949), UINT32_C(0x064EA7B0), + UINT32_C(0x008652D7), UINT32_C(0x0EA31279), UINT32_C(0x0668F188), + UINT32_C(0x035A0886) } }, + { { UINT32_C(0x02EB8133), UINT32_C(0x03EC558C), UINT32_C(0x088B2CEF), + UINT32_C(0x008352FC), UINT32_C(0x0ECF2FB1), UINT32_C(0x01F0E6BB), + UINT32_C(0x023E4A68), UINT32_C(0x0B9CC299), UINT32_C(0x02937BC1), + UINT32_C(0x0A4FE033), UINT32_C(0x03BAB078), UINT32_C(0x078C8608), + UINT32_C(0x000D53E7), UINT32_C(0x06DA1D39), UINT32_C(0x05E14C61), + UINT32_C(0x035624BE), UINT32_C(0x06669427), UINT32_C(0x079FAB65), + UINT32_C(0x0663AC20) }, + { UINT32_C(0x06835A15), UINT32_C(0x013B136D), UINT32_C(0x08DB323F), + UINT32_C(0x068809A4), UINT32_C(0x02A3957E), UINT32_C(0x0081A010), + UINT32_C(0x06B7C838), UINT32_C(0x074F156F), UINT32_C(0x00F3A4DB), + UINT32_C(0x07ADF165), UINT32_C(0x05A07A0A), UINT32_C(0x0585D310), + UINT32_C(0x02A4FAF9), UINT32_C(0x03A5C451), UINT32_C(0x00426908), + UINT32_C(0x03C76306), UINT32_C(0x0D3289C2), UINT32_C(0x04FD8A7B), + UINT32_C(0x03974EFE) } }, + { { UINT32_C(0x01D85118), UINT32_C(0x03F039A9), UINT32_C(0x0A744F66), + UINT32_C(0x00B874D3), UINT32_C(0x0AD31A3A), UINT32_C(0x07A3C5F8), + UINT32_C(0x045FFFF5), UINT32_C(0x023754A5), UINT32_C(0x02E38CB8), + UINT32_C(0x05910E6C), UINT32_C(0x01773ED0), UINT32_C(0x0835A72A), + UINT32_C(0x01BE848A), UINT32_C(0x07BD444B), UINT32_C(0x0B4AFA36), + UINT32_C(0x03B51CEC), UINT32_C(0x076A82F4), UINT32_C(0x049B5424), + UINT32_C(0x01EDBBC3) }, + { UINT32_C(0x0D472029), UINT32_C(0x07322E8C), UINT32_C(0x0891E31F), + UINT32_C(0x0598F9A4), UINT32_C(0x0B8A6C89), UINT32_C(0x065A918E), + UINT32_C(0x01B36F21), UINT32_C(0x05650472), UINT32_C(0x053A7D69), + UINT32_C(0x05F09FDE), UINT32_C(0x03CE6055), UINT32_C(0x017487DC), + UINT32_C(0x01B03227), UINT32_C(0x013D4913), UINT32_C(0x096CA6AE), + UINT32_C(0x000E46D4), UINT32_C(0x07F35B2C), UINT32_C(0x06FDC86A), + UINT32_C(0x0191F319) } }, + { { UINT32_C(0x0CE12393), UINT32_C(0x015F4FB3), UINT32_C(0x0C3E8E50), + UINT32_C(0x06CE6B2D), UINT32_C(0x0B3C1693), UINT32_C(0x045162F6), + UINT32_C(0x0407EFF6), UINT32_C(0x00A9135E), UINT32_C(0x047CF46F), + UINT32_C(0x04E91DC4), UINT32_C(0x036B9A3C), UINT32_C(0x0134193D), + UINT32_C(0x003E5C05), UINT32_C(0x00082BD9), UINT32_C(0x067D8D47), + UINT32_C(0x02764530), UINT32_C(0x01E6C320), UINT32_C(0x04A28C2A), + UINT32_C(0x048FBA5C) }, + { UINT32_C(0x0CE5DBF5), UINT32_C(0x0385772C), UINT32_C(0x019E313F), + UINT32_C(0x073071A7), UINT32_C(0x0F5FC824), UINT32_C(0x02D63EF3), + UINT32_C(0x02B70267), UINT32_C(0x0A6BE174), UINT32_C(0x076EA84E), + UINT32_C(0x0FA0EBFC), UINT32_C(0x06D310F3), UINT32_C(0x01962AC7), + UINT32_C(0x0209883D), UINT32_C(0x03B86C97), UINT32_C(0x00441CDD), + UINT32_C(0x0066501C), UINT32_C(0x03267C1F), UINT32_C(0x03EAC5C9), + UINT32_C(0x00069F5A) } }, + { { UINT32_C(0x01D1EEDB), UINT32_C(0x0706D366), UINT32_C(0x04DB59F7), + UINT32_C(0x03130058), UINT32_C(0x0FBF1E90), UINT32_C(0x02990341), + UINT32_C(0x052D42D0), UINT32_C(0x0D9F883C), UINT32_C(0x01C3CC5F), + UINT32_C(0x0602F8E0), UINT32_C(0x0719E908), UINT32_C(0x0152A103), + UINT32_C(0x05A33891), UINT32_C(0x0095E49C), UINT32_C(0x07DC00AE), + UINT32_C(0x00D04AA8), UINT32_C(0x034051A0), UINT32_C(0x01C589DC), + UINT32_C(0x044769AA) }, + { UINT32_C(0x05A4238D), UINT32_C(0x038BBADC), UINT32_C(0x024C6D7A), + UINT32_C(0x058D2A82), UINT32_C(0x0BE67DEB), UINT32_C(0x057F5E80), + UINT32_C(0x055D31EA), UINT32_C(0x0DB49C5A), UINT32_C(0x070BEC2C), + UINT32_C(0x0F3322C2), UINT32_C(0x06C3108C), UINT32_C(0x0A1130EB), + UINT32_C(0x01DE1843), UINT32_C(0x002476B9), UINT32_C(0x0C1602A0), + UINT32_C(0x020FD705), UINT32_C(0x0E87B144), UINT32_C(0x00271FD2), + UINT32_C(0x02A1E7C8) } }, + { { UINT32_C(0x0BB71E17), UINT32_C(0x00B697E6), UINT32_C(0x027C50D2), + UINT32_C(0x02FF8F72), UINT32_C(0x052B77CA), UINT32_C(0x02997C16), + UINT32_C(0x013C0178), UINT32_C(0x0F7FCEE6), UINT32_C(0x040B66E5), + UINT32_C(0x03A69C37), UINT32_C(0x02E55D76), UINT32_C(0x00F908D4), + UINT32_C(0x052718AB), UINT32_C(0x0076528F), UINT32_C(0x0306D84E), + UINT32_C(0x07EBCA7C), UINT32_C(0x01165F7E), UINT32_C(0x01DB45A9), + UINT32_C(0x067FCC94) }, + { UINT32_C(0x0791633D), UINT32_C(0x047BD9A1), UINT32_C(0x0A26D9CC), + UINT32_C(0x000BE536), UINT32_C(0x0F022B81), UINT32_C(0x064B6F3C), + UINT32_C(0x03B7DA09), UINT32_C(0x0F632491), UINT32_C(0x02A9B2EF), + UINT32_C(0x029A6C74), UINT32_C(0x039178C1), UINT32_C(0x06C1B980), + UINT32_C(0x025426C4), UINT32_C(0x00AC18E2), UINT32_C(0x0854C009), + UINT32_C(0x07A990A9), UINT32_C(0x0BA40528), UINT32_C(0x05C4D8A8), + UINT32_C(0x0628B343) } }, + { { UINT32_C(0x07812A25), UINT32_C(0x0179F4F9), UINT32_C(0x09DE2C08), + UINT32_C(0x02F4F1F9), UINT32_C(0x04F48E6A), UINT32_C(0x07549212), + UINT32_C(0x016DCA05), UINT32_C(0x07A3A534), UINT32_C(0x0359AADF), + UINT32_C(0x0E969384), UINT32_C(0x061DBB0C), UINT32_C(0x0E368BE3), + UINT32_C(0x07060163), UINT32_C(0x07CA82E3), UINT32_C(0x07332717), + UINT32_C(0x0002DFB2), UINT32_C(0x03AD0A18), UINT32_C(0x0417995E), + UINT32_C(0x0326668F) }, + { UINT32_C(0x09EF75E3), UINT32_C(0x07B04772), UINT32_C(0x0852DCD8), + UINT32_C(0x06097708), UINT32_C(0x0B957C2C), UINT32_C(0x038B98A1), + UINT32_C(0x02B82598), UINT32_C(0x0F132C73), UINT32_C(0x04CE431B), + UINT32_C(0x07D4CBE1), UINT32_C(0x049BA972), UINT32_C(0x00D3788D), + UINT32_C(0x07EDE5A2), UINT32_C(0x0635F8BD), UINT32_C(0x0EB9AB1A), + UINT32_C(0x02C621B4), UINT32_C(0x0BCBFF41), UINT32_C(0x0439D1F9), + UINT32_C(0x003044A8) } }, + { { UINT32_C(0x0CF8D334), UINT32_C(0x037C1C48), UINT32_C(0x05CD52D5), + UINT32_C(0x047578F0), UINT32_C(0x0BE7BC07), UINT32_C(0x06E68827), + UINT32_C(0x076445CB), UINT32_C(0x0FEBF611), UINT32_C(0x00142073), + UINT32_C(0x029F031E), UINT32_C(0x076C6434), UINT32_C(0x0F98F9D0), + UINT32_C(0x034E14D3), UINT32_C(0x038E0268), UINT32_C(0x0191305B), + UINT32_C(0x032A0200), UINT32_C(0x05EF4C75), UINT32_C(0x02826331), + UINT32_C(0x04D82A88) }, + { UINT32_C(0x0D51E170), UINT32_C(0x00D3F07F), UINT32_C(0x08365D15), + UINT32_C(0x0781A3A1), UINT32_C(0x0D4BE663), UINT32_C(0x00175259), + UINT32_C(0x000C1FA1), UINT32_C(0x0F00FCE0), UINT32_C(0x00299B52), + UINT32_C(0x0C7D7E01), UINT32_C(0x052A3C59), UINT32_C(0x07C9CF44), + UINT32_C(0x05E7EE2B), UINT32_C(0x035E7031), UINT32_C(0x0FE2CB7C), + UINT32_C(0x0403D2B4), UINT32_C(0x0FC9A748), UINT32_C(0x07D461AF), + UINT32_C(0x006E35B5) } }, + { { UINT32_C(0x0594D02E), UINT32_C(0x075E6F14), UINT32_C(0x03360822), + UINT32_C(0x03E7DDDB), UINT32_C(0x0F1C6110), UINT32_C(0x072483CF), + UINT32_C(0x03ECF221), UINT32_C(0x0D658C87), UINT32_C(0x060AC74F), + UINT32_C(0x0F51CC4C), UINT32_C(0x03EB69F7), UINT32_C(0x07B2F64B), + UINT32_C(0x0242F07B), UINT32_C(0x058E5984), UINT32_C(0x03A0B7A4), + UINT32_C(0x03CE806B), UINT32_C(0x06139B85), UINT32_C(0x01DAAFE3), + UINT32_C(0x0130F7E5) }, + { UINT32_C(0x020891BB), UINT32_C(0x077E28D4), UINT32_C(0x0AAEAA8D), + UINT32_C(0x00B2D799), UINT32_C(0x0E10388A), UINT32_C(0x001DFD31), + UINT32_C(0x059F85F1), UINT32_C(0x00BC7E55), UINT32_C(0x05309429), + UINT32_C(0x0FEDF8A8), UINT32_C(0x06B52B0D), UINT32_C(0x0E3F8A44), + UINT32_C(0x07A8E2A2), UINT32_C(0x07D5866C), UINT32_C(0x02DBCD7C), + UINT32_C(0x02895FBE), UINT32_C(0x0F66BDAD), UINT32_C(0x048C3CAD), + UINT32_C(0x078587AD) } }, + { { UINT32_C(0x0B1B7656), UINT32_C(0x02A1E440), UINT32_C(0x04EF5EA7), + UINT32_C(0x059FA6A2), UINT32_C(0x0C68CD6D), UINT32_C(0x005E8043), + UINT32_C(0x01AE592B), UINT32_C(0x00DD5F88), UINT32_C(0x0559B430), + UINT32_C(0x0BF3DF59), UINT32_C(0x011CBD52), UINT32_C(0x0DDDE17B), + UINT32_C(0x031D26D8), UINT32_C(0x0148FB57), UINT32_C(0x04EDBF2D), + UINT32_C(0x07220D0D), UINT32_C(0x0F7B0807), UINT32_C(0x076B1F6E), + UINT32_C(0x0306320E) }, + { UINT32_C(0x07EEE80E), UINT32_C(0x0754C15A), UINT32_C(0x093487F6), + UINT32_C(0x023D5CA0), UINT32_C(0x00BD77C2), UINT32_C(0x0271EF5D), + UINT32_C(0x04FAEAB7), UINT32_C(0x07EBA560), UINT32_C(0x015A18D8), + UINT32_C(0x039861D4), UINT32_C(0x041FD3C8), UINT32_C(0x0D5863CB), + UINT32_C(0x066C5F53), UINT32_C(0x06380D15), UINT32_C(0x0E825C9F), + UINT32_C(0x00BA76BE), UINT32_C(0x0BC4E3B8), UINT32_C(0x06216B12), + UINT32_C(0x03B4F0D4) } }, + }, + { + { { UINT32_C(0x0201C48B), UINT32_C(0x073C85A8), UINT32_C(0x095DC61E), + UINT32_C(0x05F14993), UINT32_C(0x0123BD40), UINT32_C(0x05907610), + UINT32_C(0x046FBB4C), UINT32_C(0x0A0F3B82), UINT32_C(0x078A34BB), + UINT32_C(0x003DB127), UINT32_C(0x052D9AD5), UINT32_C(0x05103EE9), + UINT32_C(0x0465988A), UINT32_C(0x005F3641), UINT32_C(0x085495F9), + UINT32_C(0x069A8F20), UINT32_C(0x064AA21B), UINT32_C(0x007CCB01), + UINT32_C(0x04384B61) }, + { UINT32_C(0x051DE678), UINT32_C(0x07820FBE), UINT32_C(0x063426A0), + UINT32_C(0x01B262F0), UINT32_C(0x0B0B9013), UINT32_C(0x045C8465), + UINT32_C(0x0240C64E), UINT32_C(0x0DDA697F), UINT32_C(0x0201A64C), + UINT32_C(0x016B17DF), UINT32_C(0x065E1757), UINT32_C(0x0F6B7334), + UINT32_C(0x07ED2866), UINT32_C(0x028D6370), UINT32_C(0x0E25340A), + UINT32_C(0x002693F4), UINT32_C(0x07D889A8), UINT32_C(0x06B215F7), + UINT32_C(0x062B5959) } }, + { { UINT32_C(0x0D9C3B89), UINT32_C(0x077CC1DC), UINT32_C(0x013DDAA7), + UINT32_C(0x0111C6F8), UINT32_C(0x0577407F), UINT32_C(0x01FF52EA), + UINT32_C(0x06D56CA6), UINT32_C(0x06331227), UINT32_C(0x03AB576F), + UINT32_C(0x0CD7FD4F), UINT32_C(0x06AF74C0), UINT32_C(0x0AD52465), + UINT32_C(0x041865E8), UINT32_C(0x0546A928), UINT32_C(0x00FE8F9D), + UINT32_C(0x07C2CDD8), UINT32_C(0x0C0D3434), UINT32_C(0x030F8525), + UINT32_C(0x05B51E81) }, + { UINT32_C(0x08A5170B), UINT32_C(0x074FC061), UINT32_C(0x0060E606), + UINT32_C(0x017D8D1E), UINT32_C(0x0A8E0395), UINT32_C(0x0428DCF1), + UINT32_C(0x046F46B8), UINT32_C(0x05E254D7), UINT32_C(0x05D05211), + UINT32_C(0x0B46AD84), UINT32_C(0x03446BA1), UINT32_C(0x00CA5FED), + UINT32_C(0x02A8C267), UINT32_C(0x0570EC98), UINT32_C(0x0750367D), + UINT32_C(0x0362D78B), UINT32_C(0x0C84DA94), UINT32_C(0x07AF8D8F), + UINT32_C(0x0583AA8B) } }, + { { UINT32_C(0x09126FAC), UINT32_C(0x06B05898), UINT32_C(0x0872DF85), + UINT32_C(0x048C3352), UINT32_C(0x0331E5B3), UINT32_C(0x076671FB), + UINT32_C(0x02076524), UINT32_C(0x0492A4A3), UINT32_C(0x06D57C7C), + UINT32_C(0x052A5C41), UINT32_C(0x052CA0DF), UINT32_C(0x0E7D0224), + UINT32_C(0x07241BC6), UINT32_C(0x0234848A), UINT32_C(0x048CE05E), + UINT32_C(0x01B286B5), UINT32_C(0x0B054813), UINT32_C(0x02F6EDFC), + UINT32_C(0x0250A4D8) }, + { UINT32_C(0x0831CD9D), UINT32_C(0x04B04313), UINT32_C(0x0F484946), + UINT32_C(0x03B996C8), UINT32_C(0x00F547BB), UINT32_C(0x007A0AA7), + UINT32_C(0x065BBAA5), UINT32_C(0x014C49BC), UINT32_C(0x03D6CABB), + UINT32_C(0x01EF46B3), UINT32_C(0x05A5D159), UINT32_C(0x0EDE3DB4), + UINT32_C(0x00D1B3A0), UINT32_C(0x02F97DFA), UINT32_C(0x0D68EB87), + UINT32_C(0x06CE81C0), UINT32_C(0x00D73B27), UINT32_C(0x0342609A), + UINT32_C(0x019C049C) } }, + { { UINT32_C(0x08BC45E5), UINT32_C(0x015B0C25), UINT32_C(0x0B2A43B0), + UINT32_C(0x00067BBC), UINT32_C(0x07B24685), UINT32_C(0x0046140C), + UINT32_C(0x0157806B), UINT32_C(0x049AE2AD), UINT32_C(0x0113F8DF), + UINT32_C(0x06BBA162), UINT32_C(0x0534E07B), UINT32_C(0x086988E1), + UINT32_C(0x00E2C213), UINT32_C(0x0513FA95), UINT32_C(0x0EC2A78F), + UINT32_C(0x02E28447), UINT32_C(0x011B9FFF), UINT32_C(0x01506FAF), + UINT32_C(0x07B4C5A9) }, + { UINT32_C(0x0AE71753), UINT32_C(0x0151FA30), UINT32_C(0x091691B4), + UINT32_C(0x02ACCC22), UINT32_C(0x0BA74B18), UINT32_C(0x0073B635), + UINT32_C(0x02F0EB55), UINT32_C(0x0CC9DF51), UINT32_C(0x0784FCDA), + UINT32_C(0x0BFAD098), UINT32_C(0x03F5BFD6), UINT32_C(0x006AD5C5), + UINT32_C(0x014F12F5), UINT32_C(0x0745527A), UINT32_C(0x03A6506B), + UINT32_C(0x015CF2C8), UINT32_C(0x039A3185), UINT32_C(0x077CD12B), + UINT32_C(0x02A9BAF3) } }, + { { UINT32_C(0x00D9229F), UINT32_C(0x039D37CD), UINT32_C(0x0948ECC6), + UINT32_C(0x0072BCB0), UINT32_C(0x0A458017), UINT32_C(0x038A159B), + UINT32_C(0x0368034D), UINT32_C(0x0B0315FA), UINT32_C(0x01756900), + UINT32_C(0x04149285), UINT32_C(0x03FFBD8A), UINT32_C(0x0079E774), + UINT32_C(0x0702A2CF), UINT32_C(0x0641C3A8), UINT32_C(0x0F3751BA), + UINT32_C(0x028EDF14), UINT32_C(0x090F681A), UINT32_C(0x012CF177), + UINT32_C(0x04614034) }, + { UINT32_C(0x04E4C072), UINT32_C(0x07E207E1), UINT32_C(0x02D8F8F8), + UINT32_C(0x013BFA68), UINT32_C(0x0CC798F9), UINT32_C(0x014BAAD6), + UINT32_C(0x023BD550), UINT32_C(0x0919F8D1), UINT32_C(0x03C00ADA), + UINT32_C(0x0758236E), UINT32_C(0x058602C2), UINT32_C(0x0FA0FE24), + UINT32_C(0x01A8C5A6), UINT32_C(0x0026B4C4), UINT32_C(0x0534F014), + UINT32_C(0x02CF2A7F), UINT32_C(0x00192714), UINT32_C(0x04B51417), + UINT32_C(0x0168C607) } }, + { { UINT32_C(0x019403A6), UINT32_C(0x04E6BA92), UINT32_C(0x0065202D), + UINT32_C(0x06FDAE5F), UINT32_C(0x0AD1C130), UINT32_C(0x05C03BED), + UINT32_C(0x00D7CFCE), UINT32_C(0x02B63E74), UINT32_C(0x06CD8D97), + UINT32_C(0x00E7608A), UINT32_C(0x05009FCD), UINT32_C(0x01026095), + UINT32_C(0x058890EC), UINT32_C(0x0662F635), UINT32_C(0x0F16F3A2), + UINT32_C(0x06B88A1B), UINT32_C(0x000D681A), UINT32_C(0x05689B12), + UINT32_C(0x0620658C) }, + { UINT32_C(0x0B48EFBA), UINT32_C(0x01574FA6), UINT32_C(0x0FC77D17), + UINT32_C(0x06CDF2A2), UINT32_C(0x0DCEA8A9), UINT32_C(0x00B1DE26), + UINT32_C(0x009A7C7A), UINT32_C(0x0435CC54), UINT32_C(0x06E8AF2E), + UINT32_C(0x09AFC5BC), UINT32_C(0x05124055), UINT32_C(0x045BF6E2), + UINT32_C(0x0536C8AD), UINT32_C(0x073FE4CD), UINT32_C(0x0A467A40), + UINT32_C(0x03EB6B38), UINT32_C(0x05F039C6), UINT32_C(0x00622055), + UINT32_C(0x045DF262) } }, + { { UINT32_C(0x0C5E165D), UINT32_C(0x00A8610A), UINT32_C(0x062AF616), + UINT32_C(0x055190B9), UINT32_C(0x0F988454), UINT32_C(0x0395472A), + UINT32_C(0x036DCD3E), UINT32_C(0x0FDA6187), UINT32_C(0x036EC91D), + UINT32_C(0x0E66FCFC), UINT32_C(0x077BBD1F), UINT32_C(0x0DF3E1C9), + UINT32_C(0x040454AC), UINT32_C(0x03004F37), UINT32_C(0x0CBDED62), + UINT32_C(0x03DD5570), UINT32_C(0x05724DFF), UINT32_C(0x07B6002A), + UINT32_C(0x00B93C70) }, + { UINT32_C(0x06C8A9BC), UINT32_C(0x032D8B60), UINT32_C(0x0C0850D6), + UINT32_C(0x06C94F36), UINT32_C(0x0649CD3A), UINT32_C(0x000C0E51), + UINT32_C(0x07B40760), UINT32_C(0x0BFA6092), UINT32_C(0x019FB910), + UINT32_C(0x092A27FF), UINT32_C(0x02D6F975), UINT32_C(0x0E910EDA), + UINT32_C(0x01FFB3D4), UINT32_C(0x01814FFF), UINT32_C(0x0985A6F8), + UINT32_C(0x06787CA6), UINT32_C(0x0B7B7FC6), UINT32_C(0x01532265), + UINT32_C(0x06228702) } }, + { { UINT32_C(0x0391B195), UINT32_C(0x01F1A68F), UINT32_C(0x0AB9DD28), + UINT32_C(0x000B690E), UINT32_C(0x0C4FD58F), UINT32_C(0x05292C46), + UINT32_C(0x0017D075), UINT32_C(0x010E0044), UINT32_C(0x0709FE41), + UINT32_C(0x02F0CD13), UINT32_C(0x003D99BE), UINT32_C(0x0E6F68D8), + UINT32_C(0x04608708), UINT32_C(0x05B1F159), UINT32_C(0x0A4CFC70), + UINT32_C(0x02FB2946), UINT32_C(0x076D32E5), UINT32_C(0x0482F0ED), + UINT32_C(0x06ED3305) }, + { UINT32_C(0x05C4416F), UINT32_C(0x02270E15), UINT32_C(0x073143E0), + UINT32_C(0x02F4151F), UINT32_C(0x099069A7), UINT32_C(0x05437AEB), + UINT32_C(0x027A90CA), UINT32_C(0x0A75E48C), UINT32_C(0x013FC627), + UINT32_C(0x0300361B), UINT32_C(0x072745C2), UINT32_C(0x0C9DD555), + UINT32_C(0x05D86308), UINT32_C(0x03713AF4), UINT32_C(0x01AF9EBC), + UINT32_C(0x0157F18F), UINT32_C(0x0E008EAF), UINT32_C(0x0409010B), + UINT32_C(0x074F85AA) } }, + { { UINT32_C(0x045C5FF5), UINT32_C(0x046845EE), UINT32_C(0x074B8893), + UINT32_C(0x036C56E2), UINT32_C(0x0CC7B43B), UINT32_C(0x030C1789), + UINT32_C(0x05916A34), UINT32_C(0x0F2AFB7C), UINT32_C(0x0154EDEB), + UINT32_C(0x0407BF3E), UINT32_C(0x05362D80), UINT32_C(0x0CCA97B1), + UINT32_C(0x041BFF6D), UINT32_C(0x05DAE466), UINT32_C(0x07D9D691), + UINT32_C(0x023DBF89), UINT32_C(0x05162F52), UINT32_C(0x000CBF57), + UINT32_C(0x0154EDFD) }, + { UINT32_C(0x08BF712A), UINT32_C(0x06009B91), UINT32_C(0x0AFFBD38), + UINT32_C(0x03FD6332), UINT32_C(0x06CD1DC8), UINT32_C(0x06C678BF), + UINT32_C(0x0040E5CE), UINT32_C(0x02743457), UINT32_C(0x060DF50E), + UINT32_C(0x0691C947), UINT32_C(0x0746D675), UINT32_C(0x0D68B325), + UINT32_C(0x0290D55C), UINT32_C(0x015B144C), UINT32_C(0x05A0332F), + UINT32_C(0x0563DB53), UINT32_C(0x04CED890), UINT32_C(0x04AC67C8), + UINT32_C(0x04387D35) } }, + { { UINT32_C(0x0A66FBB8), UINT32_C(0x05FDBF97), UINT32_C(0x0A47124E), + UINT32_C(0x03FED0AF), UINT32_C(0x082B44B9), UINT32_C(0x0244ADCE), + UINT32_C(0x05980D8A), UINT32_C(0x0687D615), UINT32_C(0x07E4662D), + UINT32_C(0x03F2180A), UINT32_C(0x04BA4DB6), UINT32_C(0x03FE8141), + UINT32_C(0x04B2BC20), UINT32_C(0x006DF40A), UINT32_C(0x0AB2698D), + UINT32_C(0x0365D173), UINT32_C(0x08DE4017), UINT32_C(0x079E6BA2), + UINT32_C(0x02C7A033) }, + { UINT32_C(0x075570A1), UINT32_C(0x06A48901), UINT32_C(0x0492AC74), + UINT32_C(0x077D2844), UINT32_C(0x0DB87BFD), UINT32_C(0x01D218B2), + UINT32_C(0x0522DA69), UINT32_C(0x0B4F7CF4), UINT32_C(0x00841BC4), + UINT32_C(0x0E420155), UINT32_C(0x00BDBB35), UINT32_C(0x0BB5E945), + UINT32_C(0x06FE4123), UINT32_C(0x0435B025), UINT32_C(0x0ACCEA16), + UINT32_C(0x00BE381C), UINT32_C(0x0C3F4D0D), UINT32_C(0x03862E1B), + UINT32_C(0x04A46652) } }, + { { UINT32_C(0x009B3F23), UINT32_C(0x00CFBD75), UINT32_C(0x069BE715), + UINT32_C(0x009C9678), UINT32_C(0x013F2EB4), UINT32_C(0x04EE1278), + UINT32_C(0x06387FDD), UINT32_C(0x0329F9F1), UINT32_C(0x048E212F), + UINT32_C(0x0F24F073), UINT32_C(0x008F0FD5), UINT32_C(0x02F3DAFE), + UINT32_C(0x039C6160), UINT32_C(0x018F4D1D), UINT32_C(0x0E9D0F18), + UINT32_C(0x066F0916), UINT32_C(0x09931852), UINT32_C(0x040EEBEA), + UINT32_C(0x032448BB) }, + { UINT32_C(0x0C226E2C), UINT32_C(0x07706840), UINT32_C(0x0D3C1C34), + UINT32_C(0x07E4BA61), UINT32_C(0x0A51E4A1), UINT32_C(0x038E00FB), + UINT32_C(0x06E25F2A), UINT32_C(0x0C263EC1), UINT32_C(0x078D29D8), + UINT32_C(0x07C7272D), UINT32_C(0x0572E10B), UINT32_C(0x0B83C0DC), + UINT32_C(0x02179CDB), UINT32_C(0x066C84E3), UINT32_C(0x07675170), + UINT32_C(0x00BDF2F6), UINT32_C(0x0F52477D), UINT32_C(0x00FE3151), + UINT32_C(0x05460029) } }, + { { UINT32_C(0x0DA35EBF), UINT32_C(0x066B421E), UINT32_C(0x07116B3C), + UINT32_C(0x077330D7), UINT32_C(0x0CE4D316), UINT32_C(0x027318E8), + UINT32_C(0x04CA0B0C), UINT32_C(0x06EFBBCB), UINT32_C(0x027FF80D), + UINT32_C(0x07B56250), UINT32_C(0x03FBF443), UINT32_C(0x0E5E86E3), + UINT32_C(0x01050837), UINT32_C(0x027F8C63), UINT32_C(0x0040889F), + UINT32_C(0x0233D7DC), UINT32_C(0x085C1EB3), UINT32_C(0x0190948B), + UINT32_C(0x02A42839) }, + { UINT32_C(0x046020F0), UINT32_C(0x04A9DB75), UINT32_C(0x0C1F003A), + UINT32_C(0x05C091F8), UINT32_C(0x069D2F26), UINT32_C(0x05CBE28A), + UINT32_C(0x00B98CA0), UINT32_C(0x0C44F77C), UINT32_C(0x06591FB2), + UINT32_C(0x0336AA95), UINT32_C(0x05A28AC0), UINT32_C(0x0A8AC670), + UINT32_C(0x0735C3E5), UINT32_C(0x049911B7), UINT32_C(0x04F28112), + UINT32_C(0x0532B634), UINT32_C(0x00A3E84F), UINT32_C(0x06EA385D), + UINT32_C(0x01F2A03A) } }, + { { UINT32_C(0x06A09384), UINT32_C(0x0260C3CA), UINT32_C(0x092529A6), + UINT32_C(0x016D77CF), UINT32_C(0x0B8E2D9A), UINT32_C(0x01055E02), + UINT32_C(0x055BC4FD), UINT32_C(0x0CA2C0AF), UINT32_C(0x03A4ABF9), + UINT32_C(0x0290D54C), UINT32_C(0x07B6E3EE), UINT32_C(0x07074346), + UINT32_C(0x047E1F90), UINT32_C(0x06D2B228), UINT32_C(0x064225A4), + UINT32_C(0x06F125F2), UINT32_C(0x0D66264B), UINT32_C(0x01B0F052), + UINT32_C(0x070B7573) }, + { UINT32_C(0x0B2264B8), UINT32_C(0x04D4A619), UINT32_C(0x0AC1F517), + UINT32_C(0x049FE3F8), UINT32_C(0x08BEDBF0), UINT32_C(0x01EB5F66), + UINT32_C(0x0145535A), UINT32_C(0x042D102F), UINT32_C(0x04447303), + UINT32_C(0x067B60A3), UINT32_C(0x043A9645), UINT32_C(0x0D502303), + UINT32_C(0x0669CEC4), UINT32_C(0x052699E3), UINT32_C(0x0E740F66), + UINT32_C(0x011DF90D), UINT32_C(0x006017A2), UINT32_C(0x03C99A89), + UINT32_C(0x069500E3) } }, + { { UINT32_C(0x0184B415), UINT32_C(0x06F26FDD), UINT32_C(0x01E5007E), + UINT32_C(0x038A2542), UINT32_C(0x0DA8A807), UINT32_C(0x078F5424), + UINT32_C(0x04D3FA96), UINT32_C(0x0A456FBD), UINT32_C(0x062853C6), + UINT32_C(0x017211A6), UINT32_C(0x049854E5), UINT32_C(0x0A8F3585), + UINT32_C(0x079A3009), UINT32_C(0x07AFB481), UINT32_C(0x081AFE37), + UINT32_C(0x031A410E), UINT32_C(0x0EADF215), UINT32_C(0x02649FCC), + UINT32_C(0x00A68E58) }, + { UINT32_C(0x0A87B468), UINT32_C(0x0744629E), UINT32_C(0x010788AE), + UINT32_C(0x00DA10EC), UINT32_C(0x07BD591B), UINT32_C(0x07BC474E), + UINT32_C(0x02AE7E4E), UINT32_C(0x074ED106), UINT32_C(0x059550A8), + UINT32_C(0x0C2FBDF7), UINT32_C(0x078A0AB0), UINT32_C(0x019D9F46), + UINT32_C(0x030FE4BE), UINT32_C(0x00DF9F6A), UINT32_C(0x04D2A38F), + UINT32_C(0x052B1469), UINT32_C(0x005AE2E6), UINT32_C(0x07E6C02D), + UINT32_C(0x0283843A) } }, + { { UINT32_C(0x0784F95B), UINT32_C(0x01616DEF), UINT32_C(0x056C696A), + UINT32_C(0x03B98963), UINT32_C(0x085F2426), UINT32_C(0x07BDAC89), + UINT32_C(0x05EAFBF9), UINT32_C(0x09A4C8CC), UINT32_C(0x0558AA78), + UINT32_C(0x0D041BCD), UINT32_C(0x04BDD0B5), UINT32_C(0x037216D5), + UINT32_C(0x06BD4C93), UINT32_C(0x0042A72A), UINT32_C(0x0B4A6F17), + UINT32_C(0x0177EE47), UINT32_C(0x028752B7), UINT32_C(0x0750D182), + UINT32_C(0x04BE36EA) }, + { UINT32_C(0x01DCCF70), UINT32_C(0x05249FC9), UINT32_C(0x063EE812), + UINT32_C(0x0362E5A3), UINT32_C(0x017DB2F0), UINT32_C(0x05508041), + UINT32_C(0x078C050C), UINT32_C(0x0C161A22), UINT32_C(0x078E338A), + UINT32_C(0x0BB9EF36), UINT32_C(0x001185AB), UINT32_C(0x09058EAD), + UINT32_C(0x00D3AF42), UINT32_C(0x02FBEDA9), UINT32_C(0x0996A3FA), + UINT32_C(0x02E0B934), UINT32_C(0x08F57F1A), UINT32_C(0x025EB5CE), + UINT32_C(0x0254456F) } }, + { { UINT32_C(0x08F9B528), UINT32_C(0x04174130), UINT32_C(0x013E12B3), + UINT32_C(0x022B697D), UINT32_C(0x0B0CEF11), UINT32_C(0x03A2E8E2), + UINT32_C(0x00D96F4F), UINT32_C(0x0B4B7DF9), UINT32_C(0x0056458A), + UINT32_C(0x083BA433), UINT32_C(0x068A2473), UINT32_C(0x0D586B52), + UINT32_C(0x00ACD634), UINT32_C(0x01D1EAD7), UINT32_C(0x03036203), + UINT32_C(0x000C0094), UINT32_C(0x047A01B9), UINT32_C(0x0212F1A6), + UINT32_C(0x04D19921) }, + { UINT32_C(0x0837554E), UINT32_C(0x02ECC2C4), UINT32_C(0x0B80FBFE), + UINT32_C(0x07A5E03B), UINT32_C(0x041C1C48), UINT32_C(0x043DD0D4), + UINT32_C(0x04C36416), UINT32_C(0x0869B643), UINT32_C(0x028DC568), + UINT32_C(0x0F15A5D2), UINT32_C(0x00D7FC36), UINT32_C(0x04D7306E), + UINT32_C(0x0306A221), UINT32_C(0x04950B4A), UINT32_C(0x06DC4FCA), + UINT32_C(0x048D5878), UINT32_C(0x0032B7DE), UINT32_C(0x000E5973), + UINT32_C(0x04FFCD15) } }, + }, + { + { { UINT32_C(0x051368EE), UINT32_C(0x03C182D8), UINT32_C(0x0233E580), + UINT32_C(0x0467AAF9), UINT32_C(0x038EEE52), UINT32_C(0x01F8CCEB), + UINT32_C(0x04E7863B), UINT32_C(0x0974DE7F), UINT32_C(0x07C7D47D), + UINT32_C(0x01F4B806), UINT32_C(0x0059F163), UINT32_C(0x07DFA5B8), + UINT32_C(0x0449B3CD), UINT32_C(0x0378D1F4), UINT32_C(0x03486C59), + UINT32_C(0x02FFDC03), UINT32_C(0x0854568F), UINT32_C(0x017FDD91), + UINT32_C(0x0384B0DC) }, + { UINT32_C(0x08A3F84B), UINT32_C(0x065DE2C1), UINT32_C(0x085945B9), + UINT32_C(0x04E5C55A), UINT32_C(0x06CB12ED), UINT32_C(0x07B741CC), + UINT32_C(0x05B2C0EB), UINT32_C(0x0809AC7E), UINT32_C(0x04A46CA2), + UINT32_C(0x061FF16D), UINT32_C(0x03744313), UINT32_C(0x0C777A3B), + UINT32_C(0x0207FD18), UINT32_C(0x0539771F), UINT32_C(0x01004BCB), + UINT32_C(0x04A8FC6F), UINT32_C(0x0F0A63E8), UINT32_C(0x02373910), + UINT32_C(0x072840F7) } }, + { { UINT32_C(0x0E024391), UINT32_C(0x02781D5D), UINT32_C(0x05026331), + UINT32_C(0x025635CD), UINT32_C(0x0492939D), UINT32_C(0x00222466), + UINT32_C(0x0456BF4C), UINT32_C(0x07C8DEE7), UINT32_C(0x000178A5), + UINT32_C(0x051D50AE), UINT32_C(0x02CE451F), UINT32_C(0x01814C6B), + UINT32_C(0x0265AE7F), UINT32_C(0x0312E044), UINT32_C(0x0848FF64), + UINT32_C(0x013BB3DA), UINT32_C(0x0C153136), UINT32_C(0x019DF825), + UINT32_C(0x0462A6B6) }, + { UINT32_C(0x0E9AB68C), UINT32_C(0x04B05DA9), UINT32_C(0x04C2481A), + UINT32_C(0x076E7298), UINT32_C(0x09F0C636), UINT32_C(0x01F7D7D4), + UINT32_C(0x00F9BB8A), UINT32_C(0x0F077B4D), UINT32_C(0x0259165A), + UINT32_C(0x0592DC29), UINT32_C(0x02303769), UINT32_C(0x0EDF23B9), + UINT32_C(0x06E3C4F3), UINT32_C(0x026481C0), UINT32_C(0x033547D1), + UINT32_C(0x04349C82), UINT32_C(0x0FB49FD0), UINT32_C(0x03D48B1E), + UINT32_C(0x00EDD6A9) } }, + { { UINT32_C(0x09496A3E), UINT32_C(0x0779CC41), UINT32_C(0x0F31204C), + UINT32_C(0x01DD9727), UINT32_C(0x0B88711D), UINT32_C(0x0531C3F2), + UINT32_C(0x04294797), UINT32_C(0x043683B3), UINT32_C(0x05DBB4CC), + UINT32_C(0x06B27F93), UINT32_C(0x04CEFE76), UINT32_C(0x02EF8CFB), + UINT32_C(0x065C5182), UINT32_C(0x051D70E4), UINT32_C(0x0B92D89E), + UINT32_C(0x015A48BA), UINT32_C(0x00689714), UINT32_C(0x02F0F899), + UINT32_C(0x03A05527) }, + { UINT32_C(0x04B88B67), UINT32_C(0x0337896D), UINT32_C(0x0AC27DF4), + UINT32_C(0x02CFE168), UINT32_C(0x003AC24A), UINT32_C(0x0287B4A1), + UINT32_C(0x04C9337D), UINT32_C(0x0480FCAA), UINT32_C(0x0385E818), + UINT32_C(0x0698332E), UINT32_C(0x00B177F0), UINT32_C(0x088F3F24), + UINT32_C(0x056A2745), UINT32_C(0x06A53116), UINT32_C(0x0101CC1F), + UINT32_C(0x013E9DBA), UINT32_C(0x06227F55), UINT32_C(0x03D027B4), + UINT32_C(0x02CD8668) } }, + { { UINT32_C(0x0076683D), UINT32_C(0x076BEE0D), UINT32_C(0x0D7D7B4C), + UINT32_C(0x0108643A), UINT32_C(0x0F993C30), UINT32_C(0x07B71D95), + UINT32_C(0x029E4008), UINT32_C(0x034C59B6), UINT32_C(0x00E01922), + UINT32_C(0x062750BC), UINT32_C(0x00DA23D4), UINT32_C(0x0BF7FFAF), + UINT32_C(0x016F2E12), UINT32_C(0x0546677C), UINT32_C(0x038327C5), + UINT32_C(0x07930C31), UINT32_C(0x03297791), UINT32_C(0x06E93707), + UINT32_C(0x0731AA7A) }, + { UINT32_C(0x0B99594F), UINT32_C(0x0300795B), UINT32_C(0x0C5F3D55), + UINT32_C(0x01C1DE37), UINT32_C(0x02FD7C9F), UINT32_C(0x001493C6), + UINT32_C(0x07BB523B), UINT32_C(0x08D81CF9), UINT32_C(0x000974EA), + UINT32_C(0x04B4CFBC), UINT32_C(0x04354B41), UINT32_C(0x0644AB94), + UINT32_C(0x0251A61B), UINT32_C(0x0555FAF5), UINT32_C(0x03713B98), + UINT32_C(0x0597947C), UINT32_C(0x061DDC4D), UINT32_C(0x01C1E655), + UINT32_C(0x05DDAC10) } }, + { { UINT32_C(0x02662A6A), UINT32_C(0x0721BA5B), UINT32_C(0x08BFB362), + UINT32_C(0x02A23D78), UINT32_C(0x04F666A1), UINT32_C(0x060FB317), + UINT32_C(0x0729C7ED), UINT32_C(0x09B1B389), UINT32_C(0x031F8856), + UINT32_C(0x06913D9E), UINT32_C(0x0779217C), UINT32_C(0x0A3634CD), + UINT32_C(0x06292B3F), UINT32_C(0x01E6FDE6), UINT32_C(0x0F97C1F0), + UINT32_C(0x0698999D), UINT32_C(0x0D773548), UINT32_C(0x01ED7CE9), + UINT32_C(0x00FFC55A) }, + { UINT32_C(0x0D76A58E), UINT32_C(0x0195519C), UINT32_C(0x02C2F7AB), + UINT32_C(0x061D1820), UINT32_C(0x09A1252D), UINT32_C(0x07772B8E), + UINT32_C(0x05554A30), UINT32_C(0x0687BCF0), UINT32_C(0x06CE8978), + UINT32_C(0x0961AAB6), UINT32_C(0x0611194A), UINT32_C(0x097F9E4C), + UINT32_C(0x07E8543A), UINT32_C(0x076F7FC5), UINT32_C(0x039F7F09), + UINT32_C(0x074DF751), UINT32_C(0x000B4239), UINT32_C(0x010D59A8), + UINT32_C(0x03F90438) } }, + { { UINT32_C(0x0DA09D54), UINT32_C(0x06AF7630), UINT32_C(0x02BF95A8), + UINT32_C(0x055D4226), UINT32_C(0x059FD1D0), UINT32_C(0x06B060C9), + UINT32_C(0x07D177E4), UINT32_C(0x03F4F180), UINT32_C(0x021C92CF), + UINT32_C(0x02D3DD59), UINT32_C(0x048EB409), UINT32_C(0x07E17E45), + UINT32_C(0x05EEE57B), UINT32_C(0x01B0CED0), UINT32_C(0x0E7E68AB), + UINT32_C(0x043C0C09), UINT32_C(0x0A766549), UINT32_C(0x0006D7E3), + UINT32_C(0x06CB262D) }, + { UINT32_C(0x045007F6), UINT32_C(0x077C78B0), UINT32_C(0x006040A8), + UINT32_C(0x06713C8D), UINT32_C(0x09341EBC), UINT32_C(0x0236E27C), + UINT32_C(0x055A82B4), UINT32_C(0x06F7750F), UINT32_C(0x0669305F), + UINT32_C(0x017EE81A), UINT32_C(0x01216750), UINT32_C(0x0ED65974), + UINT32_C(0x03FEF768), UINT32_C(0x01F1588F), UINT32_C(0x0E26B74A), + UINT32_C(0x078B116C), UINT32_C(0x0B1F0885), UINT32_C(0x05EF5659), + UINT32_C(0x02E63355) } }, + { { UINT32_C(0x0FB0D3ED), UINT32_C(0x003E5A50), UINT32_C(0x0C55AAAF), + UINT32_C(0x0289AC3D), UINT32_C(0x05EF5174), UINT32_C(0x0719E0EE), + UINT32_C(0x01A9C3D8), UINT32_C(0x0DE06CD1), UINT32_C(0x07ED918A), + UINT32_C(0x0BF6A107), UINT32_C(0x06149FAB), UINT32_C(0x0880197B), + UINT32_C(0x060CCF4B), UINT32_C(0x015F00A0), UINT32_C(0x026084C4), + UINT32_C(0x06C15B05), UINT32_C(0x04E4098B), UINT32_C(0x063ED2C8), + UINT32_C(0x058C6384) }, + { UINT32_C(0x040FA002), UINT32_C(0x01B4B412), UINT32_C(0x08A0A8F3), + UINT32_C(0x015D5274), UINT32_C(0x0B3D6C31), UINT32_C(0x0241F67E), + UINT32_C(0x0383A0C0), UINT32_C(0x0D2CCE25), UINT32_C(0x07A721DD), + UINT32_C(0x0FD7994F), UINT32_C(0x04852FC2), UINT32_C(0x0EEB0BC8), + UINT32_C(0x05CF0812), UINT32_C(0x06594895), UINT32_C(0x0F6294B1), + UINT32_C(0x047E9685), UINT32_C(0x03C1ADBF), UINT32_C(0x00B567D9), + UINT32_C(0x005C4AB1) } }, + { { UINT32_C(0x0696BA83), UINT32_C(0x06603D4F), UINT32_C(0x0885A978), + UINT32_C(0x011657F3), UINT32_C(0x0774554D), UINT32_C(0x01806495), + UINT32_C(0x01B33254), UINT32_C(0x0A1BB9D6), UINT32_C(0x03A6DF67), + UINT32_C(0x03AB9C8C), UINT32_C(0x0737480A), UINT32_C(0x00203D86), + UINT32_C(0x04CE906D), UINT32_C(0x0751DBBB), UINT32_C(0x01AB53E1), + UINT32_C(0x01405C83), UINT32_C(0x0894C75D), UINT32_C(0x02ACD3EC), + UINT32_C(0x02926ACF) }, + { UINT32_C(0x0E8C01EF), UINT32_C(0x043477F5), UINT32_C(0x068FA361), + UINT32_C(0x07FC59F7), UINT32_C(0x04967BAC), UINT32_C(0x0236FCA8), + UINT32_C(0x053E4F2C), UINT32_C(0x02BA3E65), UINT32_C(0x05F9F6F0), + UINT32_C(0x064247B4), UINT32_C(0x021B5084), UINT32_C(0x0894325C), + UINT32_C(0x04EFE79C), UINT32_C(0x0188ED3F), UINT32_C(0x0D4FE809), + UINT32_C(0x044BAE94), UINT32_C(0x0C8112AE), UINT32_C(0x05C68229), + UINT32_C(0x07D43896) } }, + { { UINT32_C(0x046C1FB6), UINT32_C(0x077D8036), UINT32_C(0x0295DD8C), + UINT32_C(0x04452F28), UINT32_C(0x0B23C464), UINT32_C(0x0644D5BA), + UINT32_C(0x05069E01), UINT32_C(0x090DF002), UINT32_C(0x03B40591), + UINT32_C(0x01F28172), UINT32_C(0x06905D57), UINT32_C(0x0DF1C74E), + UINT32_C(0x05CE4958), UINT32_C(0x079BDE8E), UINT32_C(0x0D3F2F1A), + UINT32_C(0x04E07C5F), UINT32_C(0x088FF1FA), UINT32_C(0x05C72030), + UINT32_C(0x03BE09B6) }, + { UINT32_C(0x0A78B572), UINT32_C(0x052D6B4B), UINT32_C(0x091101F1), + UINT32_C(0x01EB64B1), UINT32_C(0x0AA87947), UINT32_C(0x01ECBA5D), + UINT32_C(0x03E02CC6), UINT32_C(0x0FDA4839), UINT32_C(0x02FF59B8), + UINT32_C(0x0CA6ED0F), UINT32_C(0x06C0BD08), UINT32_C(0x0948203F), + UINT32_C(0x00417563), UINT32_C(0x03ED5E44), UINT32_C(0x09D9F1D1), + UINT32_C(0x043138E9), UINT32_C(0x087C76A9), UINT32_C(0x0436C464), + UINT32_C(0x065BC41C) } }, + { { UINT32_C(0x0878503F), UINT32_C(0x02F87D12), UINT32_C(0x02476646), + UINT32_C(0x0245CC6E), UINT32_C(0x0D4C90B6), UINT32_C(0x03F5323B), + UINT32_C(0x05B608C2), UINT32_C(0x0E11AA7B), UINT32_C(0x03BBF4CC), + UINT32_C(0x0E62F0E5), UINT32_C(0x03FDD83B), UINT32_C(0x01FAF12E), + UINT32_C(0x00E02D6E), UINT32_C(0x0404666D), UINT32_C(0x0A39480C), + UINT32_C(0x05904EE4), UINT32_C(0x0D422EC7), UINT32_C(0x009272AF), + UINT32_C(0x065E518B) }, + { UINT32_C(0x0947A480), UINT32_C(0x0638CCA2), UINT32_C(0x0B86EFCD), + UINT32_C(0x04C5912B), UINT32_C(0x0416F142), UINT32_C(0x066CD9A8), + UINT32_C(0x0062F342), UINT32_C(0x030CBA20), UINT32_C(0x0675D320), + UINT32_C(0x02C4F492), UINT32_C(0x04263BD8), UINT32_C(0x0B10ED23), + UINT32_C(0x00458FD7), UINT32_C(0x064D3804), UINT32_C(0x030CE729), + UINT32_C(0x055F1902), UINT32_C(0x005C9288), UINT32_C(0x05B65212), + UINT32_C(0x03463ED7) } }, + { { UINT32_C(0x0002FA40), UINT32_C(0x019C27F1), UINT32_C(0x00CBB750), + UINT32_C(0x03DB3435), UINT32_C(0x07286E98), UINT32_C(0x0279AAFF), + UINT32_C(0x06D46384), UINT32_C(0x0A49DB6A), UINT32_C(0x0137478E), + UINT32_C(0x07036ADC), UINT32_C(0x0156A020), UINT32_C(0x03444CA2), + UINT32_C(0x014A059C), UINT32_C(0x062920C4), UINT32_C(0x05340D48), + UINT32_C(0x07AB2B40), UINT32_C(0x060E1CBF), UINT32_C(0x06DBC3C7), + UINT32_C(0x02A6E451) }, + { UINT32_C(0x02203C97), UINT32_C(0x0318811D), UINT32_C(0x02528A1B), + UINT32_C(0x04016192), UINT32_C(0x002C3086), UINT32_C(0x031D212C), + UINT32_C(0x03FC1DA6), UINT32_C(0x0E3A234E), UINT32_C(0x048A2B44), + UINT32_C(0x046AB91A), UINT32_C(0x03F8806B), UINT32_C(0x073943DE), + UINT32_C(0x02B12570), UINT32_C(0x024DEAC9), UINT32_C(0x08C3B2AA), + UINT32_C(0x06910619), UINT32_C(0x01EBE0ED), UINT32_C(0x04FB5E82), + UINT32_C(0x068938E9) } }, + { { UINT32_C(0x06A8409F), UINT32_C(0x03819FA0), UINT32_C(0x04EBCC7D), + UINT32_C(0x05295667), UINT32_C(0x00BD47C4), UINT32_C(0x02F397A5), + UINT32_C(0x00B133A1), UINT32_C(0x073E4AFA), UINT32_C(0x0760D526), + UINT32_C(0x0D372CAA), UINT32_C(0x0068759A), UINT32_C(0x09A7813F), + UINT32_C(0x000A0F4E), UINT32_C(0x01EAF02F), UINT32_C(0x09F88085), + UINT32_C(0x0117D84A), UINT32_C(0x0B583330), UINT32_C(0x07FFDDE3), + UINT32_C(0x00C0B54F) }, + { UINT32_C(0x0593BC03), UINT32_C(0x05294489), UINT32_C(0x0C95575C), + UINT32_C(0x06A16930), UINT32_C(0x07E57953), UINT32_C(0x04258C35), + UINT32_C(0x027EF886), UINT32_C(0x09A129B5), UINT32_C(0x034A8854), + UINT32_C(0x0BB5AF8F), UINT32_C(0x0469C5BA), UINT32_C(0x000C4849), + UINT32_C(0x00CE9665), UINT32_C(0x02759E17), UINT32_C(0x087D763E), + UINT32_C(0x03FB717D), UINT32_C(0x0F3FD635), UINT32_C(0x007CA5FC), + UINT32_C(0x01D3A8B2) } }, + { { UINT32_C(0x068172DA), UINT32_C(0x05B9F788), UINT32_C(0x0612E973), + UINT32_C(0x0052E050), UINT32_C(0x099B39D0), UINT32_C(0x061F5F0F), + UINT32_C(0x0799AF1A), UINT32_C(0x0466C10B), UINT32_C(0x0680E8D3), + UINT32_C(0x04361EC0), UINT32_C(0x05210B2E), UINT32_C(0x0DF23AB3), + UINT32_C(0x02B3A0B2), UINT32_C(0x0380194E), UINT32_C(0x09D77AFB), + UINT32_C(0x06BCE4AB), UINT32_C(0x05EAD2E7), UINT32_C(0x02DD9B74), + UINT32_C(0x033D66F2) }, + { UINT32_C(0x0BF1C993), UINT32_C(0x04E38933), UINT32_C(0x02FC4FAF), + UINT32_C(0x0461AE62), UINT32_C(0x0F6D1B38), UINT32_C(0x021B47B4), + UINT32_C(0x01F061C9), UINT32_C(0x051CC234), UINT32_C(0x01C8E186), + UINT32_C(0x001C7EF9), UINT32_C(0x0664E0E2), UINT32_C(0x048E8CC7), + UINT32_C(0x015C9670), UINT32_C(0x0481B87A), UINT32_C(0x05BCAD05), + UINT32_C(0x003B38E6), UINT32_C(0x00886CA1), UINT32_C(0x00B0D706), + UINT32_C(0x026557A5) } }, + { { UINT32_C(0x05F0E5DA), UINT32_C(0x03682274), UINT32_C(0x0F4E352F), + UINT32_C(0x0105AE83), UINT32_C(0x0A820E71), UINT32_C(0x022C5CEC), + UINT32_C(0x03DD2CFC), UINT32_C(0x0298E61A), UINT32_C(0x00120917), + UINT32_C(0x0B0B64DF), UINT32_C(0x03C1333E), UINT32_C(0x03C5D41B), + UINT32_C(0x04B5D215), UINT32_C(0x0187971D), UINT32_C(0x0389EAD7), + UINT32_C(0x03CFCCE2), UINT32_C(0x063F13FF), UINT32_C(0x0652C165), + UINT32_C(0x07742EFC) }, + { UINT32_C(0x0931C0F0), UINT32_C(0x018F45E5), UINT32_C(0x0C4C756D), + UINT32_C(0x0537A469), UINT32_C(0x0433FB52), UINT32_C(0x0754DECC), + UINT32_C(0x04D896F7), UINT32_C(0x04335219), UINT32_C(0x073BBC0E), + UINT32_C(0x083BA2C0), UINT32_C(0x012D3B9E), UINT32_C(0x023EABD5), + UINT32_C(0x04475CF9), UINT32_C(0x07A0DA39), UINT32_C(0x088DDF48), + UINT32_C(0x002FFFDF), UINT32_C(0x0D8B7000), UINT32_C(0x06504250), + UINT32_C(0x00F1A818) } }, + { { UINT32_C(0x052228CC), UINT32_C(0x06FA4348), UINT32_C(0x0F049E30), + UINT32_C(0x0713CA99), UINT32_C(0x0E5D39FE), UINT32_C(0x0057B8DA), + UINT32_C(0x003125E1), UINT32_C(0x0CC15492), UINT32_C(0x07700BE8), + UINT32_C(0x08CFE785), UINT32_C(0x00CEB57F), UINT32_C(0x0F478327), + UINT32_C(0x05A00945), UINT32_C(0x0490F14E), UINT32_C(0x025BA378), + UINT32_C(0x060ED998), UINT32_C(0x01B249B5), UINT32_C(0x0023BC4C), + UINT32_C(0x04DEDEC8) }, + { UINT32_C(0x0BA1E090), UINT32_C(0x027EBAC8), UINT32_C(0x0DD6FE71), + UINT32_C(0x01F0ADDC), UINT32_C(0x0549F634), UINT32_C(0x06BE8416), + UINT32_C(0x02F156E2), UINT32_C(0x0A531A53), UINT32_C(0x00AFBE73), + UINT32_C(0x0FFF18EB), UINT32_C(0x0020C1DC), UINT32_C(0x0F409F61), + UINT32_C(0x04E3859C), UINT32_C(0x015D5ECF), UINT32_C(0x03B3F268), + UINT32_C(0x0288B503), UINT32_C(0x03A276BD), UINT32_C(0x0286EE9C), + UINT32_C(0x03166F91) } }, + { { UINT32_C(0x0F1CAC2C), UINT32_C(0x035777A8), UINT32_C(0x0AF34113), + UINT32_C(0x050DD855), UINT32_C(0x0B6BC9C1), UINT32_C(0x07010D91), + UINT32_C(0x0452008D), UINT32_C(0x0471A3DA), UINT32_C(0x05830FDC), + UINT32_C(0x0F222BBE), UINT32_C(0x04848384), UINT32_C(0x049CFD4D), + UINT32_C(0x01817D66), UINT32_C(0x0724627E), UINT32_C(0x082270B8), + UINT32_C(0x07ED5A0F), UINT32_C(0x0EEA015A), UINT32_C(0x0700F77E), + UINT32_C(0x007E36E1) }, + { UINT32_C(0x09244F78), UINT32_C(0x049DAC0A), UINT32_C(0x0573D581), + UINT32_C(0x001D1B4C), UINT32_C(0x0F0116EB), UINT32_C(0x03CFFD42), + UINT32_C(0x043FFF66), UINT32_C(0x048523A0), UINT32_C(0x0671CEF3), + UINT32_C(0x0EC2D7AF), UINT32_C(0x0049EBD0), UINT32_C(0x0F4034B6), + UINT32_C(0x05C34B54), UINT32_C(0x025E680B), UINT32_C(0x0D2C5BEA), + UINT32_C(0x06F544F6), UINT32_C(0x0B0CFA5A), UINT32_C(0x018276AE), + UINT32_C(0x077D6B16) } }, + }, + { + { { UINT32_C(0x00E10587), UINT32_C(0x01885D11), UINT32_C(0x00A74863), + UINT32_C(0x02F34C13), UINT32_C(0x0BD4B6A2), UINT32_C(0x00E26C23), + UINT32_C(0x07F483FF), UINT32_C(0x0A97D9DC), UINT32_C(0x02338A61), + UINT32_C(0x07F72547), UINT32_C(0x03535AFC), UINT32_C(0x0B8E96B4), + UINT32_C(0x001E804D), UINT32_C(0x03BD1DFE), UINT32_C(0x0A6ED29A), + UINT32_C(0x0634588A), UINT32_C(0x0F0F6D32), UINT32_C(0x0117DDE8), + UINT32_C(0x037107C5) }, + { UINT32_C(0x0BF698BD), UINT32_C(0x0671195E), UINT32_C(0x0E9DC570), + UINT32_C(0x052CBC52), UINT32_C(0x0C08C8ED), UINT32_C(0x04213081), + UINT32_C(0x00A08E33), UINT32_C(0x0A4BC1ED), UINT32_C(0x00B396EB), + UINT32_C(0x0FF34D08), UINT32_C(0x04A4BDD9), UINT32_C(0x0A6F615E), + UINT32_C(0x0534B5A0), UINT32_C(0x0057D6A7), UINT32_C(0x0F6CE02C), + UINT32_C(0x06F6315B), UINT32_C(0x0D666709), UINT32_C(0x050AF998), + UINT32_C(0x006F0E3F) } }, + { { UINT32_C(0x06965640), UINT32_C(0x0081356B), UINT32_C(0x0F41E038), + UINT32_C(0x06713218), UINT32_C(0x0FB9E806), UINT32_C(0x0121D001), + UINT32_C(0x07B97EDD), UINT32_C(0x0CDDEFA2), UINT32_C(0x0585D94D), + UINT32_C(0x065F4CD7), UINT32_C(0x03CFC91B), UINT32_C(0x06B603EF), + UINT32_C(0x07128C67), UINT32_C(0x030595F0), UINT32_C(0x0E51FB71), + UINT32_C(0x06217FBE), UINT32_C(0x0B730732), UINT32_C(0x06277C1D), + UINT32_C(0x04AE17C6) }, + { UINT32_C(0x0CFB1D0D), UINT32_C(0x053AA14E), UINT32_C(0x0442F9BE), + UINT32_C(0x0786EEC1), UINT32_C(0x0EF775DF), UINT32_C(0x07A66D5B), + UINT32_C(0x032CDF98), UINT32_C(0x0CA3E106), UINT32_C(0x07042EBA), + UINT32_C(0x00FD51A1), UINT32_C(0x02B743F2), UINT32_C(0x0D214308), + UINT32_C(0x03293BD7), UINT32_C(0x0635DC49), UINT32_C(0x0EB86870), + UINT32_C(0x03EB73BF), UINT32_C(0x07F02587), UINT32_C(0x0017A824), + UINT32_C(0x01F012DD) } }, + { { UINT32_C(0x0E0BF039), UINT32_C(0x003B2CD3), UINT32_C(0x0C2C0F48), + UINT32_C(0x039AED35), UINT32_C(0x044C7CCC), UINT32_C(0x0364D078), + UINT32_C(0x02C04409), UINT32_C(0x0CAEF9C4), UINT32_C(0x05C37F4A), + UINT32_C(0x0D99EE77), UINT32_C(0x0200140A), UINT32_C(0x0A3BBBDE), + UINT32_C(0x041E7C9A), UINT32_C(0x0371B744), UINT32_C(0x05A165FF), + UINT32_C(0x05A7216A), UINT32_C(0x0A9CE444), UINT32_C(0x03DD4951), + UINT32_C(0x031EC3D2) }, + { UINT32_C(0x08EAF6EB), UINT32_C(0x0703CD67), UINT32_C(0x0DEBC6FB), + UINT32_C(0x079F8F47), UINT32_C(0x090D3A5B), UINT32_C(0x05FF4EFE), + UINT32_C(0x05A2BC42), UINT32_C(0x006C3961), UINT32_C(0x00795219), + UINT32_C(0x0FF8315E), UINT32_C(0x05BD4244), UINT32_C(0x02EEA381), + UINT32_C(0x02022F89), UINT32_C(0x07878373), UINT32_C(0x084B3FA1), + UINT32_C(0x0715713B), UINT32_C(0x0EF55815), UINT32_C(0x0748BA61), + UINT32_C(0x0445AEE6) } }, + { { UINT32_C(0x0DCBF5E2), UINT32_C(0x03557A9E), UINT32_C(0x063D2A67), + UINT32_C(0x00EFE9F6), UINT32_C(0x09FA350B), UINT32_C(0x03896396), + UINT32_C(0x01F8036E), UINT32_C(0x0DC0F10D), UINT32_C(0x02B56329), + UINT32_C(0x02504A0F), UINT32_C(0x063A7100), UINT32_C(0x0FA5A9E7), + UINT32_C(0x07665FD9), UINT32_C(0x05DE4FB8), UINT32_C(0x00484D0C), + UINT32_C(0x03AEE4FB), UINT32_C(0x046B10E6), UINT32_C(0x04D5E0D6), + UINT32_C(0x01F835F4) }, + { UINT32_C(0x047D2B4B), UINT32_C(0x05847634), UINT32_C(0x0C0A675C), + UINT32_C(0x00120157), UINT32_C(0x07AF8F0E), UINT32_C(0x0251A99B), + UINT32_C(0x00CEE4D0), UINT32_C(0x07351889), UINT32_C(0x0621596F), + UINT32_C(0x00C5618B), UINT32_C(0x066E65D2), UINT32_C(0x049D9FBE), + UINT32_C(0x01E37BCF), UINT32_C(0x01C629C9), UINT32_C(0x0EC1F561), + UINT32_C(0x02AFE546), UINT32_C(0x0005751E), UINT32_C(0x018C42B2), + UINT32_C(0x01EAA03C) } }, + { { UINT32_C(0x0D959BD9), UINT32_C(0x038EEBBB), UINT32_C(0x08419A01), + UINT32_C(0x05F1CCBE), UINT32_C(0x03171501), UINT32_C(0x07C18C55), + UINT32_C(0x035306D9), UINT32_C(0x011DBDEA), UINT32_C(0x036E5963), + UINT32_C(0x090BCEBA), UINT32_C(0x01350854), UINT32_C(0x0BB28AF5), + UINT32_C(0x04F74928), UINT32_C(0x0330FF01), UINT32_C(0x095BA009), + UINT32_C(0x0578BFB6), UINT32_C(0x0FCF0801), UINT32_C(0x03302535), + UINT32_C(0x06BFF304) }, + { UINT32_C(0x0384E611), UINT32_C(0x00AD5348), UINT32_C(0x0E493BE6), + UINT32_C(0x03CA4CDB), UINT32_C(0x0C4D1BD5), UINT32_C(0x027B8CE4), + UINT32_C(0x02E5B4CB), UINT32_C(0x0707AF6D), UINT32_C(0x06A39971), + UINT32_C(0x0BA42E4C), UINT32_C(0x0755E74C), UINT32_C(0x04AD6360), + UINT32_C(0x068A6F0D), UINT32_C(0x023144DE), UINT32_C(0x07375993), + UINT32_C(0x02780B3A), UINT32_C(0x0E492027), UINT32_C(0x05808694), + UINT32_C(0x07431A53) } }, + { { UINT32_C(0x010FBD04), UINT32_C(0x019723AA), UINT32_C(0x025CF109), + UINT32_C(0x03F3A3A7), UINT32_C(0x0D9D8E3F), UINT32_C(0x02F7C4B0), + UINT32_C(0x03DF7DF6), UINT32_C(0x0B60F06D), UINT32_C(0x02A5D26D), + UINT32_C(0x0C5F86A4), UINT32_C(0x06E7FCD9), UINT32_C(0x0DEF388F), + UINT32_C(0x05AC83A6), UINT32_C(0x0217A751), UINT32_C(0x00401D85), + UINT32_C(0x075A320E), UINT32_C(0x01AE8195), UINT32_C(0x06F4F327), + UINT32_C(0x04C77D2F) }, + { UINT32_C(0x09493BE8), UINT32_C(0x00A14C7B), UINT32_C(0x091C8FF9), + UINT32_C(0x01DEAA22), UINT32_C(0x0AB4BA27), UINT32_C(0x0562E012), + UINT32_C(0x07519BAB), UINT32_C(0x062D9AAA), UINT32_C(0x058B7863), + UINT32_C(0x08A2419C), UINT32_C(0x035D8277), UINT32_C(0x0F5C3CF3), + UINT32_C(0x03527C6B), UINT32_C(0x00F3B9E0), UINT32_C(0x0EF25B4A), + UINT32_C(0x0127A8B4), UINT32_C(0x0CE17BD2), UINT32_C(0x0195E53E), + UINT32_C(0x071B9B4C) } }, + { { UINT32_C(0x0DAA2FB7), UINT32_C(0x021B0EB2), UINT32_C(0x0B55E936), + UINT32_C(0x057A20CC), UINT32_C(0x01398941), UINT32_C(0x06E0BA5C), + UINT32_C(0x07DEDA3A), UINT32_C(0x00B1377E), UINT32_C(0x008093F5), + UINT32_C(0x00F8C281), UINT32_C(0x05D4332E), UINT32_C(0x0CF54E5F), + UINT32_C(0x039D7F62), UINT32_C(0x0699AB5B), UINT32_C(0x05FE8914), + UINT32_C(0x01C38070), UINT32_C(0x0685A0AC), UINT32_C(0x0104BEEE), + UINT32_C(0x06E340C1) }, + { UINT32_C(0x0FDAA949), UINT32_C(0x02A92433), UINT32_C(0x04E882FB), + UINT32_C(0x0435EA3D), UINT32_C(0x0CFC4BD1), UINT32_C(0x065698D5), + UINT32_C(0x02B61BEC), UINT32_C(0x0A7025E9), UINT32_C(0x06C77C84), + UINT32_C(0x066340BA), UINT32_C(0x07C0B02F), UINT32_C(0x0F9B4BCA), + UINT32_C(0x0207D1CA), UINT32_C(0x061D80D9), UINT32_C(0x061524CC), + UINT32_C(0x03F6A9F8), UINT32_C(0x094B6D53), UINT32_C(0x017C53E1), + UINT32_C(0x00BC771D) } }, + { { UINT32_C(0x0C8D6167), UINT32_C(0x0171F9BD), UINT32_C(0x05943DEC), + UINT32_C(0x01837B9B), UINT32_C(0x06E46FBD), UINT32_C(0x050C893D), + UINT32_C(0x0034F50C), UINT32_C(0x0E98EEDA), UINT32_C(0x06603ADA), + UINT32_C(0x0FF3362D), UINT32_C(0x023406A4), UINT32_C(0x03DC7095), + UINT32_C(0x03BCCC93), UINT32_C(0x033BDFE7), UINT32_C(0x0AA65D81), + UINT32_C(0x0739E2AF), UINT32_C(0x03455112), UINT32_C(0x06643DC0), + UINT32_C(0x020DF18F) }, + { UINT32_C(0x084BF04E), UINT32_C(0x024B7756), UINT32_C(0x059E51F9), + UINT32_C(0x05998215), UINT32_C(0x03684ACA), UINT32_C(0x065BD6DC), + UINT32_C(0x03075ACB), UINT32_C(0x01AD9C9A), UINT32_C(0x07375334), + UINT32_C(0x01731A12), UINT32_C(0x000384D3), UINT32_C(0x02632FF6), + UINT32_C(0x0023BB3A), UINT32_C(0x0348AF93), UINT32_C(0x088B02BB), + UINT32_C(0x02C7DE6E), UINT32_C(0x0933F326), UINT32_C(0x00B1B61E), + UINT32_C(0x076AC60E) } }, + { { UINT32_C(0x0757C756), UINT32_C(0x05545A21), UINT32_C(0x018FFA93), + UINT32_C(0x06C9A78F), UINT32_C(0x02C61841), UINT32_C(0x040A1739), + UINT32_C(0x04441B1D), UINT32_C(0x052E0E81), UINT32_C(0x07E14C4D), + UINT32_C(0x0FFFC0D5), UINT32_C(0x03072E2E), UINT32_C(0x007584A9), + UINT32_C(0x01259E6D), UINT32_C(0x002D25F5), UINT32_C(0x0C519B94), + UINT32_C(0x01BB1C14), UINT32_C(0x02CEB824), UINT32_C(0x02BBBEA4), + UINT32_C(0x035E112A) }, + { UINT32_C(0x0288CF7B), UINT32_C(0x0045C5C7), UINT32_C(0x002D8D8C), + UINT32_C(0x03BE5B42), UINT32_C(0x0A81E4C6), UINT32_C(0x0141578F), + UINT32_C(0x033F7AC2), UINT32_C(0x0EE71541), UINT32_C(0x067EAD7B), + UINT32_C(0x07E75F23), UINT32_C(0x011AF108), UINT32_C(0x047CA170), + UINT32_C(0x05308227), UINT32_C(0x054879D4), UINT32_C(0x0A37B132), + UINT32_C(0x00E6D1CA), UINT32_C(0x0629367A), UINT32_C(0x03276C5F), + UINT32_C(0x004CBC63) } }, + { { UINT32_C(0x00CF69E7), UINT32_C(0x0584FC9D), UINT32_C(0x06952F73), + UINT32_C(0x0281D51C), UINT32_C(0x037663C6), UINT32_C(0x0537F046), + UINT32_C(0x0725FFD4), UINT32_C(0x0C66B9FC), UINT32_C(0x049A3EDF), + UINT32_C(0x0F4FB830), UINT32_C(0x06728E50), UINT32_C(0x07B188F6), + UINT32_C(0x021C067A), UINT32_C(0x06F06BE8), UINT32_C(0x00AA347B), + UINT32_C(0x031AABF8), UINT32_C(0x03347446), UINT32_C(0x04B62373), + UINT32_C(0x043D128D) }, + { UINT32_C(0x02AE7427), UINT32_C(0x00F73AC9), UINT32_C(0x0095D833), + UINT32_C(0x00E6005C), UINT32_C(0x007FD8B7), UINT32_C(0x074C2204), + UINT32_C(0x00283649), UINT32_C(0x084EDD51), UINT32_C(0x05AC7321), + UINT32_C(0x08C40328), UINT32_C(0x04BFB5EF), UINT32_C(0x0A555FE0), + UINT32_C(0x04C70C7C), UINT32_C(0x076D0055), UINT32_C(0x0425B2E6), + UINT32_C(0x029D910F), UINT32_C(0x0B0A51DB), UINT32_C(0x04B38F9B), + UINT32_C(0x01028D80) } }, + { { UINT32_C(0x0F3DE4D2), UINT32_C(0x06047E27), UINT32_C(0x03505298), + UINT32_C(0x062523ED), UINT32_C(0x0F0D4A9F), UINT32_C(0x0150EF42), + UINT32_C(0x056CBCAD), UINT32_C(0x0B36A628), UINT32_C(0x071A352A), + UINT32_C(0x0D7A2CB8), UINT32_C(0x050FEDFC), UINT32_C(0x02BAC823), + UINT32_C(0x010EDF77), UINT32_C(0x0459668A), UINT32_C(0x04041659), + UINT32_C(0x07432BB7), UINT32_C(0x0F9651D8), UINT32_C(0x01999DE2), + UINT32_C(0x00CBECA1) }, + { UINT32_C(0x06A2607F), UINT32_C(0x06DC83E9), UINT32_C(0x005B1A08), + UINT32_C(0x05B9405C), UINT32_C(0x091E04D3), UINT32_C(0x0546E232), + UINT32_C(0x0566FE22), UINT32_C(0x0695BB9A), UINT32_C(0x0074A612), + UINT32_C(0x0E9787A0), UINT32_C(0x077B1860), UINT32_C(0x05404661), + UINT32_C(0x00184991), UINT32_C(0x02A1C038), UINT32_C(0x0A57F0B8), + UINT32_C(0x0382A987), UINT32_C(0x0691AC01), UINT32_C(0x02D8A8A9), + UINT32_C(0x05A19B11) } }, + { { UINT32_C(0x081DC2A6), UINT32_C(0x017A4663), UINT32_C(0x0209D21F), + UINT32_C(0x06A6AA7F), UINT32_C(0x051CC44C), UINT32_C(0x000D763F), + UINT32_C(0x034EFD90), UINT32_C(0x0DEE4042), UINT32_C(0x07CBAFFB), + UINT32_C(0x082C34D9), UINT32_C(0x02EB3FE5), UINT32_C(0x0BF15295), + UINT32_C(0x027D4089), UINT32_C(0x056DBCC8), UINT32_C(0x024595A7), + UINT32_C(0x03EC08BE), UINT32_C(0x057085E2), UINT32_C(0x017E7356), + UINT32_C(0x049CE745) }, + { UINT32_C(0x0123BA29), UINT32_C(0x0045804E), UINT32_C(0x08DEDF0E), + UINT32_C(0x00CB57D1), UINT32_C(0x0F61E577), UINT32_C(0x06EB6B79), + UINT32_C(0x05E3EED1), UINT32_C(0x09CB4DCD), UINT32_C(0x05DAE17F), + UINT32_C(0x034F393E), UINT32_C(0x03F5164C), UINT32_C(0x05F3C4A2), + UINT32_C(0x0708CC05), UINT32_C(0x04F2CAC7), UINT32_C(0x0798DD7C), + UINT32_C(0x0513331D), UINT32_C(0x004B3A41), UINT32_C(0x00801443), + UINT32_C(0x0196B762) } }, + { { UINT32_C(0x0356B52C), UINT32_C(0x03557744), UINT32_C(0x050104FE), + UINT32_C(0x069B4687), UINT32_C(0x0337937D), UINT32_C(0x018C3F4F), + UINT32_C(0x00568175), UINT32_C(0x01EE408E), UINT32_C(0x04092DE8), + UINT32_C(0x05E59E83), UINT32_C(0x0299816F), UINT32_C(0x05556DCC), + UINT32_C(0x038621D8), UINT32_C(0x0278A753), UINT32_C(0x05BC9211), + UINT32_C(0x009E162C), UINT32_C(0x0A3409DC), UINT32_C(0x04076EA9), + UINT32_C(0x0464CEC0) }, + { UINT32_C(0x0A659158), UINT32_C(0x022396D5), UINT32_C(0x08424377), + UINT32_C(0x0054703B), UINT32_C(0x0D2722F5), UINT32_C(0x03BAEB8A), + UINT32_C(0x04B65383), UINT32_C(0x07997DDA), UINT32_C(0x07F6A3B2), + UINT32_C(0x0BAFF348), UINT32_C(0x0299F9D9), UINT32_C(0x0B97AA04), + UINT32_C(0x02BA4DB8), UINT32_C(0x0696475F), UINT32_C(0x0B68D089), + UINT32_C(0x0472CB9F), UINT32_C(0x08CACFAE), UINT32_C(0x028807A6), + UINT32_C(0x009288EF) } }, + { { UINT32_C(0x0ED9CDF5), UINT32_C(0x00B31C4E), UINT32_C(0x0C549857), + UINT32_C(0x02D7F964), UINT32_C(0x074F9F98), UINT32_C(0x0792DF5F), + UINT32_C(0x020ED722), UINT32_C(0x0AA8C982), UINT32_C(0x02A2408C), + UINT32_C(0x053CDF30), UINT32_C(0x01CF47E5), UINT32_C(0x08E3FF2F), + UINT32_C(0x0333087A), UINT32_C(0x028090D6), UINT32_C(0x032F6CA0), + UINT32_C(0x02CF642E), UINT32_C(0x0DAB4498), UINT32_C(0x04A66B66), + UINT32_C(0x07248BCE) }, + { UINT32_C(0x092B1FE6), UINT32_C(0x02AD6EEE), UINT32_C(0x0EB5963E), + UINT32_C(0x0621B6BD), UINT32_C(0x04A1A8EF), UINT32_C(0x0374D40D), + UINT32_C(0x0573791F), UINT32_C(0x0DED8513), UINT32_C(0x03AEE0F5), + UINT32_C(0x03420B85), UINT32_C(0x04366099), UINT32_C(0x087C7CA7), + UINT32_C(0x00B9ADB9), UINT32_C(0x056E8EBA), UINT32_C(0x0E532676), + UINT32_C(0x05D27A22), UINT32_C(0x0554F4E5), UINT32_C(0x0474B581), + UINT32_C(0x02A6694F) } }, + { { UINT32_C(0x080DE633), UINT32_C(0x0639306E), UINT32_C(0x0CA4F76E), + UINT32_C(0x05BB3DCB), UINT32_C(0x06DA081A), UINT32_C(0x052EA9E2), + UINT32_C(0x017AF437), UINT32_C(0x07D25D54), UINT32_C(0x0772DE75), + UINT32_C(0x05670178), UINT32_C(0x06E81696), UINT32_C(0x0D28F3A1), + UINT32_C(0x07AF022A), UINT32_C(0x07B0D67B), UINT32_C(0x04C17950), + UINT32_C(0x001B706E), UINT32_C(0x04CE5637), UINT32_C(0x04CE1F2F), + UINT32_C(0x0211C385) }, + { UINT32_C(0x0E5D0D74), UINT32_C(0x0411D39E), UINT32_C(0x06137F67), + UINT32_C(0x00487846), UINT32_C(0x01B15D1C), UINT32_C(0x02B65C31), + UINT32_C(0x06027C03), UINT32_C(0x01F15577), UINT32_C(0x011F0564), + UINT32_C(0x066BA415), UINT32_C(0x00520E15), UINT32_C(0x01F82222), + UINT32_C(0x07F8C048), UINT32_C(0x05A09F41), UINT32_C(0x0BBA92E8), + UINT32_C(0x017E3648), UINT32_C(0x0861CC16), UINT32_C(0x07A9DAF6), + UINT32_C(0x05F2C6E5) } }, + { { UINT32_C(0x04DA7708), UINT32_C(0x057D4066), UINT32_C(0x01F6A8A0), + UINT32_C(0x00EE18FE), UINT32_C(0x05BB3FCD), UINT32_C(0x071CB79F), + UINT32_C(0x038BBCE0), UINT32_C(0x0AAFE87E), UINT32_C(0x0245536B), + UINT32_C(0x0D0401C6), UINT32_C(0x027984FD), UINT32_C(0x0064D51F), + UINT32_C(0x04DCF2A2), UINT32_C(0x037E99AD), UINT32_C(0x03487C33), + UINT32_C(0x068353F1), UINT32_C(0x0BA863FC), UINT32_C(0x00721339), + UINT32_C(0x0754D195) }, + { UINT32_C(0x09031706), UINT32_C(0x0327DD4E), UINT32_C(0x05DDA163), + UINT32_C(0x03F893AE), UINT32_C(0x0F1F3959), UINT32_C(0x02EC658A), + UINT32_C(0x05A438AD), UINT32_C(0x0AE93F30), UINT32_C(0x01D8B56B), + UINT32_C(0x09592309), UINT32_C(0x0189BB66), UINT32_C(0x050E8D52), + UINT32_C(0x0526168D), UINT32_C(0x07FD307D), UINT32_C(0x08A4C7BC), + UINT32_C(0x03B12944), UINT32_C(0x08329BC8), UINT32_C(0x02A4A1CE), + UINT32_C(0x0087B284) } }, + }, + { + { { UINT32_C(0x01C86157), UINT32_C(0x0017ED5F), UINT32_C(0x079948D2), + UINT32_C(0x02FD6755), UINT32_C(0x0A5E2B5C), UINT32_C(0x00395EB0), + UINT32_C(0x070A6ECC), UINT32_C(0x031E307B), UINT32_C(0x070DA4B9), + UINT32_C(0x0166FB85), UINT32_C(0x02AF3210), UINT32_C(0x079379FF), + UINT32_C(0x010504D3), UINT32_C(0x022DFB7B), UINT32_C(0x0C019CF3), + UINT32_C(0x05E0727A), UINT32_C(0x0CE73CB9), UINT32_C(0x005CF0C7), + UINT32_C(0x039AD397) }, + { UINT32_C(0x08E15F36), UINT32_C(0x04E08562), UINT32_C(0x0EC12012), + UINT32_C(0x009F68C4), UINT32_C(0x0733E4B1), UINT32_C(0x014872C8), + UINT32_C(0x0490CCCC), UINT32_C(0x0E53957D), UINT32_C(0x05CD4F2D), + UINT32_C(0x082FD79D), UINT32_C(0x05F2B6D8), UINT32_C(0x0C7600B1), + UINT32_C(0x02D81D79), UINT32_C(0x007520D1), UINT32_C(0x09EEC681), + UINT32_C(0x04D6FB1B), UINT32_C(0x0641B032), UINT32_C(0x0283E5C0), + UINT32_C(0x072A39F3) } }, + { { UINT32_C(0x01C9C2EC), UINT32_C(0x03A87BAF), UINT32_C(0x056E06F3), + UINT32_C(0x02AA4CD5), UINT32_C(0x0D64394D), UINT32_C(0x044B2642), + UINT32_C(0x018E8ECB), UINT32_C(0x02C6B29E), UINT32_C(0x00B5D0E1), + UINT32_C(0x0795603C), UINT32_C(0x027FEAC7), UINT32_C(0x07400535), + UINT32_C(0x04BD90C2), UINT32_C(0x0212CC37), UINT32_C(0x018B9D6C), + UINT32_C(0x05FC9D53), UINT32_C(0x03C7248E), UINT32_C(0x038A1FEB), + UINT32_C(0x06C809CE) }, + { UINT32_C(0x06F1CACC), UINT32_C(0x0758DFC1), UINT32_C(0x019C0D17), + UINT32_C(0x0749CD61), UINT32_C(0x00C0724E), UINT32_C(0x0667F861), + UINT32_C(0x03CDAF01), UINT32_C(0x0DE66325), UINT32_C(0x0767BD47), + UINT32_C(0x0A1FDF93), UINT32_C(0x04E66E27), UINT32_C(0x004977BC), + UINT32_C(0x05EE6515), UINT32_C(0x018DEC59), UINT32_C(0x03B99628), + UINT32_C(0x02B69F3F), UINT32_C(0x019CC516), UINT32_C(0x07CB4623), + UINT32_C(0x0353C229) } }, + { { UINT32_C(0x05A2D6F0), UINT32_C(0x04982642), UINT32_C(0x088CE54F), + UINT32_C(0x06602A66), UINT32_C(0x0A17C84E), UINT32_C(0x02BE4DCE), + UINT32_C(0x0718C264), UINT32_C(0x0FDCB2D1), UINT32_C(0x01F7AC59), + UINT32_C(0x0E4C2C6C), UINT32_C(0x01B5B9D3), UINT32_C(0x0CCEB9E5), + UINT32_C(0x04C7FB08), UINT32_C(0x04600748), UINT32_C(0x09F19FD9), + UINT32_C(0x011C0141), UINT32_C(0x0A08392D), UINT32_C(0x07099321), + UINT32_C(0x075F26A3) }, + { UINT32_C(0x0AF35FA1), UINT32_C(0x01CA261B), UINT32_C(0x0FF7838D), + UINT32_C(0x00432E0D), UINT32_C(0x08296922), UINT32_C(0x077D0499), + UINT32_C(0x06A4988A), UINT32_C(0x0D91BD7B), UINT32_C(0x007D4895), + UINT32_C(0x01A77EB2), UINT32_C(0x0491B2C9), UINT32_C(0x07D6BB4E), + UINT32_C(0x065BB828), UINT32_C(0x05D28C77), UINT32_C(0x034C1831), + UINT32_C(0x03111000), UINT32_C(0x048A3F8F), UINT32_C(0x007D19EE), + UINT32_C(0x006FAC9D) } }, + { { UINT32_C(0x0719C87C), UINT32_C(0x07385BC9), UINT32_C(0x01F42502), + UINT32_C(0x074D4561), UINT32_C(0x02CA79B8), UINT32_C(0x01BE905A), + UINT32_C(0x044E03DC), UINT32_C(0x05034A1A), UINT32_C(0x012B4964), + UINT32_C(0x0BF284CE), UINT32_C(0x0080C91A), UINT32_C(0x0B4EE205), + UINT32_C(0x0121E876), UINT32_C(0x04C7D981), UINT32_C(0x09D6F0D5), + UINT32_C(0x011438CC), UINT32_C(0x0906A777), UINT32_C(0x05FD89D1), + UINT32_C(0x01D7C3AC) }, + { UINT32_C(0x0392D834), UINT32_C(0x0199066B), UINT32_C(0x0E53AECD), + UINT32_C(0x0279A7E5), UINT32_C(0x0E8B313A), UINT32_C(0x04F8A2AF), + UINT32_C(0x062A274F), UINT32_C(0x0869ED62), UINT32_C(0x01C4081F), + UINT32_C(0x0DD27618), UINT32_C(0x0093ED89), UINT32_C(0x053869B6), + UINT32_C(0x07CB8D0C), UINT32_C(0x00D79FE6), UINT32_C(0x04A20332), + UINT32_C(0x03366324), UINT32_C(0x0C0B74C3), UINT32_C(0x070C316E), + UINT32_C(0x066AD76F) } }, + { { UINT32_C(0x011FA55B), UINT32_C(0x0775F5E8), UINT32_C(0x0C7BF6F4), + UINT32_C(0x07FCBE6F), UINT32_C(0x021BE3C2), UINT32_C(0x0017D919), + UINT32_C(0x01644455), UINT32_C(0x0AEE3FD7), UINT32_C(0x0259DD5E), + UINT32_C(0x002EC22F), UINT32_C(0x00D308F5), UINT32_C(0x038F6CBC), + UINT32_C(0x04FDED85), UINT32_C(0x001A53FA), UINT32_C(0x03E09FE9), + UINT32_C(0x0312E74F), UINT32_C(0x09B20907), UINT32_C(0x078CC1DB), + UINT32_C(0x066D9E8D) }, + { UINT32_C(0x08C7A5B7), UINT32_C(0x038B0D82), UINT32_C(0x063E4030), + UINT32_C(0x06CE3A75), UINT32_C(0x0488AD55), UINT32_C(0x0054AAAA), + UINT32_C(0x044F068C), UINT32_C(0x0CCE69AA), UINT32_C(0x014EF6E0), + UINT32_C(0x068C0346), UINT32_C(0x01443327), UINT32_C(0x0A416B3D), + UINT32_C(0x04EB25A7), UINT32_C(0x00B6E80F), UINT32_C(0x0819D7FD), + UINT32_C(0x061AFFF1), UINT32_C(0x070E8C81), UINT32_C(0x061C5530), + UINT32_C(0x0473CB02) } }, + { { UINT32_C(0x08D8BE36), UINT32_C(0x057DE7D1), UINT32_C(0x06025FA9), + UINT32_C(0x0039A5D5), UINT32_C(0x00FD02EF), UINT32_C(0x02EE7913), + UINT32_C(0x04E5E224), UINT32_C(0x052DC251), UINT32_C(0x04138D66), + UINT32_C(0x09FAF17A), UINT32_C(0x030D57A1), UINT32_C(0x08B8F06A), + UINT32_C(0x01D015A2), UINT32_C(0x0153FCA9), UINT32_C(0x0C54D5DF), + UINT32_C(0x00BAAE4A), UINT32_C(0x0940A0FA), UINT32_C(0x038292EA), + UINT32_C(0x02C97BC9) }, + { UINT32_C(0x024BFA00), UINT32_C(0x057378C3), UINT32_C(0x0A92C578), + UINT32_C(0x07A6310B), UINT32_C(0x0F28F901), UINT32_C(0x04ED3F57), + UINT32_C(0x037C7D8A), UINT32_C(0x00B71701), UINT32_C(0x0173A01A), + UINT32_C(0x0A9B43A3), UINT32_C(0x0196E612), UINT32_C(0x07111189), + UINT32_C(0x03F5BC1D), UINT32_C(0x05154B49), UINT32_C(0x0DD68D97), + UINT32_C(0x0220CC1D), UINT32_C(0x0895DF59), UINT32_C(0x0014717C), + UINT32_C(0x0384CEF8) } }, + { { UINT32_C(0x05F8022D), UINT32_C(0x07431A94), UINT32_C(0x0A7A9097), + UINT32_C(0x06FC555D), UINT32_C(0x0578029C), UINT32_C(0x00758DC8), + UINT32_C(0x00FDAF66), UINT32_C(0x0AE902D1), UINT32_C(0x06FDDF4D), + UINT32_C(0x056FCD2A), UINT32_C(0x0393CA27), UINT32_C(0x083EDDB9), + UINT32_C(0x071C8D5E), UINT32_C(0x02DA7EE1), UINT32_C(0x091B7578), + UINT32_C(0x022CF2B8), UINT32_C(0x08F559AF), UINT32_C(0x00F551D9), + UINT32_C(0x04CE7872) }, + { UINT32_C(0x0450FD39), UINT32_C(0x05325A33), UINT32_C(0x06D04EAD), + UINT32_C(0x0111017F), UINT32_C(0x04B7D043), UINT32_C(0x009CD030), + UINT32_C(0x02760D24), UINT32_C(0x0B333C83), UINT32_C(0x0178F799), + UINT32_C(0x06E56E99), UINT32_C(0x06AC4002), UINT32_C(0x06C6F55C), + UINT32_C(0x04212C69), UINT32_C(0x0776C549), UINT32_C(0x05AD10F2), + UINT32_C(0x07D4C443), UINT32_C(0x093443A3), UINT32_C(0x01E4DAC4), + UINT32_C(0x062304F4) } }, + { { UINT32_C(0x09FFF942), UINT32_C(0x039E7FBF), UINT32_C(0x0E4E0544), + UINT32_C(0x01C8EF03), UINT32_C(0x015953E4), UINT32_C(0x0641511A), + UINT32_C(0x0340D7DD), UINT32_C(0x04FBA207), UINT32_C(0x04DCD411), + UINT32_C(0x0CE5C435), UINT32_C(0x06C85A54), UINT32_C(0x0596F209), + UINT32_C(0x006C47CF), UINT32_C(0x039823F7), UINT32_C(0x01721D4C), + UINT32_C(0x03FE86B7), UINT32_C(0x044008FA), UINT32_C(0x05E107EC), + UINT32_C(0x0146DF75) }, + { UINT32_C(0x03BF30CF), UINT32_C(0x034E0D17), UINT32_C(0x0C6EB8E1), + UINT32_C(0x016786DE), UINT32_C(0x0B4F8D94), UINT32_C(0x01E54C18), + UINT32_C(0x0409537F), UINT32_C(0x0AD69F59), UINT32_C(0x04423A96), + UINT32_C(0x01427559), UINT32_C(0x0517F981), UINT32_C(0x0C655FF1), + UINT32_C(0x072A4662), UINT32_C(0x014DB58F), UINT32_C(0x09979D6E), + UINT32_C(0x05396DDB), UINT32_C(0x03E46CF7), UINT32_C(0x062B9D62), + UINT32_C(0x0334D070) } }, + { { UINT32_C(0x0C8B2AF6), UINT32_C(0x04C4030A), UINT32_C(0x03F4EA61), + UINT32_C(0x06B51CFD), UINT32_C(0x08530E96), UINT32_C(0x035106EB), + UINT32_C(0x07ACB7C9), UINT32_C(0x003FAA6D), UINT32_C(0x005AFE21), + UINT32_C(0x09C9266C), UINT32_C(0x02684731), UINT32_C(0x0745AC29), + UINT32_C(0x06162CD8), UINT32_C(0x069A0B95), UINT32_C(0x090B8391), + UINT32_C(0x0570D83A), UINT32_C(0x09AE0D06), UINT32_C(0x054A95B8), + UINT32_C(0x02CB380B) }, + { UINT32_C(0x02779E4D), UINT32_C(0x04B32E43), UINT32_C(0x0C0582B0), + UINT32_C(0x03521F35), UINT32_C(0x089A8F39), UINT32_C(0x03BF1933), + UINT32_C(0x027659AD), UINT32_C(0x0607CE4F), UINT32_C(0x072A97A4), + UINT32_C(0x0F6C2DAD), UINT32_C(0x0648C496), UINT32_C(0x02D0AF23), + UINT32_C(0x036927AF), UINT32_C(0x032E9075), UINT32_C(0x01C0AD79), + UINT32_C(0x02044936), UINT32_C(0x0DBCFEA2), UINT32_C(0x07DADFF1), + UINT32_C(0x06EDBCF7) } }, + { { UINT32_C(0x0209B80C), UINT32_C(0x01E54056), UINT32_C(0x0E397930), + UINT32_C(0x01AD9D0C), UINT32_C(0x0908F895), UINT32_C(0x02A9A26E), + UINT32_C(0x00744EB0), UINT32_C(0x0B2D7673), UINT32_C(0x00736623), + UINT32_C(0x0F9EEB98), UINT32_C(0x07E8C693), UINT32_C(0x05615D70), + UINT32_C(0x077E9858), UINT32_C(0x045C88B2), UINT32_C(0x06BA3291), + UINT32_C(0x02089363), UINT32_C(0x0D1148CA), UINT32_C(0x026B1CE4), + UINT32_C(0x0267E39A) }, + { UINT32_C(0x0E9F76E1), UINT32_C(0x0700247A), UINT32_C(0x02F5C013), + UINT32_C(0x045D6B0B), UINT32_C(0x02398752), UINT32_C(0x011414B8), + UINT32_C(0x0189B0D8), UINT32_C(0x065621BE), UINT32_C(0x07214CB5), + UINT32_C(0x0C72745E), UINT32_C(0x026E830D), UINT32_C(0x0BB5064F), + UINT32_C(0x03BD6991), UINT32_C(0x067AABA6), UINT32_C(0x03AAD9C4), + UINT32_C(0x01C748B3), UINT32_C(0x0F2AD6A8), UINT32_C(0x07B1AAD0), + UINT32_C(0x0515A45B) } }, + { { UINT32_C(0x0D45283F), UINT32_C(0x033F0C2B), UINT32_C(0x0EF7ECBA), + UINT32_C(0x03F31217), UINT32_C(0x0BF2BDDB), UINT32_C(0x05AE5F1D), + UINT32_C(0x015A33AE), UINT32_C(0x0B1D94AB), UINT32_C(0x00BB377A), + UINT32_C(0x077D4679), UINT32_C(0x056AF89C), UINT32_C(0x07165F99), + UINT32_C(0x046A17A3), UINT32_C(0x04CF6178), UINT32_C(0x00269B9B), + UINT32_C(0x03F1B9F6), UINT32_C(0x07453C34), UINT32_C(0x07253011), + UINT32_C(0x074559A2) }, + { UINT32_C(0x08D82B0E), UINT32_C(0x00D12F5F), UINT32_C(0x01FD52F5), + UINT32_C(0x03C4069B), UINT32_C(0x0B01B2FE), UINT32_C(0x05E81250), + UINT32_C(0x035DC621), UINT32_C(0x034EA726), UINT32_C(0x04613127), + UINT32_C(0x0B36D680), UINT32_C(0x06F52BC5), UINT32_C(0x04B16171), + UINT32_C(0x02156292), UINT32_C(0x0180583E), UINT32_C(0x0C8D5B19), + UINT32_C(0x043B9BE2), UINT32_C(0x097EF032), UINT32_C(0x0307A273), + UINT32_C(0x02ECC50D) } }, + { { UINT32_C(0x0613AC50), UINT32_C(0x01BBB9CD), UINT32_C(0x032CF181), + UINT32_C(0x04565F80), UINT32_C(0x09B00E52), UINT32_C(0x011EC5E2), + UINT32_C(0x05E7561C), UINT32_C(0x05B6572C), UINT32_C(0x072FBF3A), + UINT32_C(0x04311E38), UINT32_C(0x0350633E), UINT32_C(0x0C27E7E9), + UINT32_C(0x02DC82FC), UINT32_C(0x01DE746D), UINT32_C(0x078E3236), + UINT32_C(0x0712B6B0), UINT32_C(0x000A7E83), UINT32_C(0x0115CB1B), + UINT32_C(0x04C1103F) }, + { UINT32_C(0x0359ED2E), UINT32_C(0x065ADF64), UINT32_C(0x025E3238), + UINT32_C(0x076BEAFD), UINT32_C(0x072427F7), UINT32_C(0x05DBCD55), + UINT32_C(0x07AB37FF), UINT32_C(0x0865BFD5), UINT32_C(0x04382D44), + UINT32_C(0x0F1D5580), UINT32_C(0x06D00533), UINT32_C(0x08D6A784), + UINT32_C(0x05BB29BF), UINT32_C(0x005CEC3F), UINT32_C(0x06575E68), + UINT32_C(0x053585D5), UINT32_C(0x0403BCB0), UINT32_C(0x02F77540), + UINT32_C(0x02470C7F) } }, + { { UINT32_C(0x02C087ED), UINT32_C(0x07961B4B), UINT32_C(0x0F657FC0), + UINT32_C(0x00B16431), UINT32_C(0x01885C19), UINT32_C(0x029A3FB7), + UINT32_C(0x0721535D), UINT32_C(0x02FAD79C), UINT32_C(0x0596E385), + UINT32_C(0x02412161), UINT32_C(0x0289A97A), UINT32_C(0x01B54107), + UINT32_C(0x0271E7BB), UINT32_C(0x02E3D256), UINT32_C(0x07E3B820), + UINT32_C(0x07F5A8EE), UINT32_C(0x0C3BD541), UINT32_C(0x01BBC84D), + UINT32_C(0x02D55A46) }, + { UINT32_C(0x006E7D53), UINT32_C(0x07982C04), UINT32_C(0x09C948A0), + UINT32_C(0x00A62A93), UINT32_C(0x047CD945), UINT32_C(0x060F1A2B), + UINT32_C(0x05764587), UINT32_C(0x02111992), UINT32_C(0x03CD3492), + UINT32_C(0x0E5873CA), UINT32_C(0x04871D26), UINT32_C(0x0EBDD263), + UINT32_C(0x07899288), UINT32_C(0x00105962), UINT32_C(0x07975B25), + UINT32_C(0x00D6A34D), UINT32_C(0x02DF3799), UINT32_C(0x02807307), + UINT32_C(0x06FCAC54) } }, + { { UINT32_C(0x0302E505), UINT32_C(0x02CAC37A), UINT32_C(0x01A79721), + UINT32_C(0x03B2E74F), UINT32_C(0x0BE5B627), UINT32_C(0x019F58EA), + UINT32_C(0x03B18976), UINT32_C(0x0663CE37), UINT32_C(0x04C1003E), + UINT32_C(0x086DCC91), UINT32_C(0x0566BE13), UINT32_C(0x0A0C94D1), + UINT32_C(0x04A0F522), UINT32_C(0x01CBC165), UINT32_C(0x03D621C1), + UINT32_C(0x03F68C3D), UINT32_C(0x04156E0A), UINT32_C(0x04C1C807), + UINT32_C(0x002BF853) }, + { UINT32_C(0x073938D8), UINT32_C(0x076E66F8), UINT32_C(0x0251205F), + UINT32_C(0x01B82A4E), UINT32_C(0x0C9EAC88), UINT32_C(0x0736DBEE), + UINT32_C(0x028732CD), UINT32_C(0x03522855), UINT32_C(0x0343EE5A), + UINT32_C(0x053E49A4), UINT32_C(0x025D55C0), UINT32_C(0x0D4096DF), + UINT32_C(0x01108518), UINT32_C(0x02AE724F), UINT32_C(0x07514106), + UINT32_C(0x0301EB15), UINT32_C(0x0D82C2DE), UINT32_C(0x05E3A585), + UINT32_C(0x036F14AF) } }, + { { UINT32_C(0x07452267), UINT32_C(0x01E0D6D7), UINT32_C(0x04A4A896), + UINT32_C(0x06D1C7B5), UINT32_C(0x03C983EF), UINT32_C(0x017B4C4A), + UINT32_C(0x07C8F2FB), UINT32_C(0x078C2CCC), UINT32_C(0x0676C9A3), + UINT32_C(0x09CD585C), UINT32_C(0x0529FFB0), UINT32_C(0x020720BD), + UINT32_C(0x07B793B3), UINT32_C(0x07E65DA3), UINT32_C(0x0C89EDD5), + UINT32_C(0x04009C8D), UINT32_C(0x0EDC15A4), UINT32_C(0x077C8AC3), + UINT32_C(0x074868C1) }, + { UINT32_C(0x0DBC2674), UINT32_C(0x07B6C41F), UINT32_C(0x0B10636B), + UINT32_C(0x0607B000), UINT32_C(0x01B2C3EF), UINT32_C(0x014283CF), + UINT32_C(0x07BD944A), UINT32_C(0x016DA691), UINT32_C(0x0147454E), + UINT32_C(0x052DE117), UINT32_C(0x06E5CDC4), UINT32_C(0x0C7BE891), + UINT32_C(0x03BD94DE), UINT32_C(0x00362FA3), UINT32_C(0x0608B5DA), + UINT32_C(0x000C28A8), UINT32_C(0x06CFAD2C), UINT32_C(0x0502E5EB), + UINT32_C(0x0081DDC6) } }, + { { UINT32_C(0x0A2FCC67), UINT32_C(0x050EED2A), UINT32_C(0x0EAC3925), + UINT32_C(0x03CCFE3E), UINT32_C(0x0DC1F4E8), UINT32_C(0x012FD64C), + UINT32_C(0x02CFA2B3), UINT32_C(0x07921E80), UINT32_C(0x04F76E6D), + UINT32_C(0x090CBEA8), UINT32_C(0x00304ECF), UINT32_C(0x0933B9C8), + UINT32_C(0x01E92879), UINT32_C(0x062A922A), UINT32_C(0x03BEBB40), + UINT32_C(0x0475B5A4), UINT32_C(0x0AB9D3C2), UINT32_C(0x02845E4B), + UINT32_C(0x073D2AD6) }, + { UINT32_C(0x026C197B), UINT32_C(0x060C44B9), UINT32_C(0x07D6B2DD), + UINT32_C(0x06E7D188), UINT32_C(0x03B672A1), UINT32_C(0x0277F32F), + UINT32_C(0x011D4198), UINT32_C(0x07C178F6), UINT32_C(0x02E95A84), + UINT32_C(0x005619C7), UINT32_C(0x029B73FC), UINT32_C(0x03CAC5E3), + UINT32_C(0x068A3B5E), UINT32_C(0x07C2DFA8), UINT32_C(0x00EC9903), + UINT32_C(0x07AEED34), UINT32_C(0x08C0A0D0), UINT32_C(0x02A2FF79), + UINT32_C(0x06DBE6B8) } }, + }, + { + { { UINT32_C(0x0C3D1383), UINT32_C(0x04E126EE), UINT32_C(0x0B631DA3), + UINT32_C(0x03014900), UINT32_C(0x0D3831FE), UINT32_C(0x01BF06C7), + UINT32_C(0x032CA284), UINT32_C(0x092E0CA0), UINT32_C(0x01703AE0), + UINT32_C(0x0DCB8158), UINT32_C(0x06FF316B), UINT32_C(0x0ED60D31), + UINT32_C(0x05DB467E), UINT32_C(0x01F3917A), UINT32_C(0x06770BD1), + UINT32_C(0x00A944AF), UINT32_C(0x08E2035D), UINT32_C(0x020A054F), + UINT32_C(0x035F8744) }, + { UINT32_C(0x0A303000), UINT32_C(0x0029FD2C), UINT32_C(0x0A5D9AC4), + UINT32_C(0x06593596), UINT32_C(0x0288D9B1), UINT32_C(0x02B32376), + UINT32_C(0x067C4E0D), UINT32_C(0x0D1B984D), UINT32_C(0x04235BF5), + UINT32_C(0x001AA52B), UINT32_C(0x0221BA35), UINT32_C(0x0B74D0D3), + UINT32_C(0x03DDFA56), UINT32_C(0x004A6854), UINT32_C(0x01203660), + UINT32_C(0x0090027D), UINT32_C(0x02356607), UINT32_C(0x064E652F), + UINT32_C(0x01D4CBEB) } }, + { { UINT32_C(0x05CFE5E0), UINT32_C(0x04C8937C), UINT32_C(0x084C1BC9), + UINT32_C(0x0651FCA6), UINT32_C(0x0BDAC076), UINT32_C(0x079DB07C), + UINT32_C(0x01988893), UINT32_C(0x0D8E1644), UINT32_C(0x04F7CFCD), + UINT32_C(0x05727E1E), UINT32_C(0x073F0B5C), UINT32_C(0x0D975E23), + UINT32_C(0x06001F51), UINT32_C(0x07B2218F), UINT32_C(0x07159FF4), + UINT32_C(0x02D8AF28), UINT32_C(0x0F0AFF67), UINT32_C(0x0464C014), + UINT32_C(0x005A1007) }, + { UINT32_C(0x078A8DB5), UINT32_C(0x035A301E), UINT32_C(0x0E9F9693), + UINT32_C(0x07A8969A), UINT32_C(0x096A5ECF), UINT32_C(0x03467DDF), + UINT32_C(0x07AF13AA), UINT32_C(0x0BF17A6B), UINT32_C(0x00FBC9C7), + UINT32_C(0x002F3F21), UINT32_C(0x01610D30), UINT32_C(0x0A6FEF92), + UINT32_C(0x00334A31), UINT32_C(0x0619D424), UINT32_C(0x011832DC), + UINT32_C(0x04A2EBED), UINT32_C(0x092C4F4E), UINT32_C(0x03E72AFA), + UINT32_C(0x04555CAD) } }, + { { UINT32_C(0x0E8401D3), UINT32_C(0x031A9337), UINT32_C(0x0A68B915), + UINT32_C(0x006E6E9B), UINT32_C(0x0B1B6E29), UINT32_C(0x01B7F14B), + UINT32_C(0x047E0BD8), UINT32_C(0x0A8CBD43), UINT32_C(0x024528C3), + UINT32_C(0x08CA88A7), UINT32_C(0x000A1FEE), UINT32_C(0x0F21E47C), + UINT32_C(0x07D1A248), UINT32_C(0x04BE0AD5), UINT32_C(0x071E2CED), + UINT32_C(0x025521CD), UINT32_C(0x0F41E897), UINT32_C(0x0398886C), + UINT32_C(0x04779FFD) }, + { UINT32_C(0x0A828FA8), UINT32_C(0x017C8B2C), UINT32_C(0x0910B047), + UINT32_C(0x06160B77), UINT32_C(0x0B98B463), UINT32_C(0x07DF3373), + UINT32_C(0x0455763C), UINT32_C(0x0F1284BE), UINT32_C(0x00906AAE), + UINT32_C(0x01A75E0B), UINT32_C(0x07A6DA7C), UINT32_C(0x0FFCAFF1), + UINT32_C(0x050D6EE5), UINT32_C(0x024BD0BA), UINT32_C(0x08383A01), + UINT32_C(0x070AE8EA), UINT32_C(0x0CAA2B64), UINT32_C(0x06171B63), + UINT32_C(0x020CE9FD) } }, + { { UINT32_C(0x0147F509), UINT32_C(0x0074A121), UINT32_C(0x0B1C1B8D), + UINT32_C(0x00A39076), UINT32_C(0x0E542208), UINT32_C(0x01A08FA4), + UINT32_C(0x012AA998), UINT32_C(0x0954BE0E), UINT32_C(0x05751A97), + UINT32_C(0x09EFE174), UINT32_C(0x05C09E0D), UINT32_C(0x0DEE1815), + UINT32_C(0x000B0415), UINT32_C(0x06D82BE5), UINT32_C(0x000E24A9), + UINT32_C(0x042F7FD4), UINT32_C(0x0698791D), UINT32_C(0x05A5F79E), + UINT32_C(0x0334C8D5) }, + { UINT32_C(0x0BB690A0), UINT32_C(0x01835514), UINT32_C(0x031B4F26), + UINT32_C(0x023AC44F), UINT32_C(0x012CDCD1), UINT32_C(0x059AE369), + UINT32_C(0x0123A551), UINT32_C(0x0AEBA693), UINT32_C(0x07D984CD), + UINT32_C(0x0DAD9128), UINT32_C(0x0765643E), UINT32_C(0x0910F0F8), + UINT32_C(0x03FB31E2), UINT32_C(0x01BD811A), UINT32_C(0x059F6B39), + UINT32_C(0x049E6619), UINT32_C(0x06B63C96), UINT32_C(0x075166F7), + UINT32_C(0x025CA72B) } }, + { { UINT32_C(0x055F34E4), UINT32_C(0x00BF08BF), UINT32_C(0x03730236), + UINT32_C(0x039543BD), UINT32_C(0x05C17F94), UINT32_C(0x00A5C65D), + UINT32_C(0x06121DA8), UINT32_C(0x099AC777), UINT32_C(0x02DCC3D6), + UINT32_C(0x09002059), UINT32_C(0x0460BBB3), UINT32_C(0x07A202D8), + UINT32_C(0x04C44EB5), UINT32_C(0x049D001E), UINT32_C(0x0E783DED), + UINT32_C(0x0120D789), UINT32_C(0x086FA177), UINT32_C(0x065D19BF), + UINT32_C(0x042CA8B7) }, + { UINT32_C(0x02860379), UINT32_C(0x06375711), UINT32_C(0x078E9829), + UINT32_C(0x04F20A43), UINT32_C(0x0ADA67C4), UINT32_C(0x054101F4), + UINT32_C(0x0602943F), UINT32_C(0x03FD9150), UINT32_C(0x06B8D61B), + UINT32_C(0x06F5ADD6), UINT32_C(0x06EB2BAC), UINT32_C(0x0A07906A), + UINT32_C(0x0147EDC1), UINT32_C(0x0477D372), UINT32_C(0x0025B1CE), + UINT32_C(0x071B32CF), UINT32_C(0x0F40C9C6), UINT32_C(0x02483D0B), + UINT32_C(0x07A56FCD) } }, + { { UINT32_C(0x0B1B724E), UINT32_C(0x0100B5C8), UINT32_C(0x081380B3), + UINT32_C(0x048D8711), UINT32_C(0x0E363740), UINT32_C(0x029ED59F), + UINT32_C(0x05E7819F), UINT32_C(0x02898DC3), UINT32_C(0x03621527), + UINT32_C(0x0F99DD5D), UINT32_C(0x01DF449E), UINT32_C(0x022C0763), + UINT32_C(0x04490568), UINT32_C(0x051A6A61), UINT32_C(0x0EE682C8), + UINT32_C(0x0315AB2B), UINT32_C(0x08BF8EC0), UINT32_C(0x0221F0BD), + UINT32_C(0x0034A2F5) }, + { UINT32_C(0x0505A0E7), UINT32_C(0x031C759D), UINT32_C(0x006AE380), + UINT32_C(0x04AD9B4F), UINT32_C(0x0F850346), UINT32_C(0x0053B140), + UINT32_C(0x060AB23A), UINT32_C(0x021E3C52), UINT32_C(0x002B9A66), + UINT32_C(0x01646B7A), UINT32_C(0x03977D69), UINT32_C(0x02418634), + UINT32_C(0x05E2030C), UINT32_C(0x06F8DED9), UINT32_C(0x064302A0), + UINT32_C(0x0553D4B6), UINT32_C(0x0956D92B), UINT32_C(0x0537BD35), + UINT32_C(0x07AFABE7) } }, + { { UINT32_C(0x04CB8040), UINT32_C(0x016D2E6C), UINT32_C(0x0DDE4688), + UINT32_C(0x00DF2559), UINT32_C(0x0A980125), UINT32_C(0x066A1AC7), + UINT32_C(0x07DF5C4B), UINT32_C(0x0FD3C659), UINT32_C(0x00481C65), + UINT32_C(0x0AE5A70F), UINT32_C(0x029F751C), UINT32_C(0x00B4A3D4), + UINT32_C(0x075575BC), UINT32_C(0x045CF25E), UINT32_C(0x06867A07), + UINT32_C(0x076D7354), UINT32_C(0x0861487C), UINT32_C(0x017CEA2E), + UINT32_C(0x03228414) }, + { UINT32_C(0x026AE111), UINT32_C(0x038FA015), UINT32_C(0x060716CA), + UINT32_C(0x04976285), UINT32_C(0x059BC9DE), UINT32_C(0x043BF937), + UINT32_C(0x035F13A1), UINT32_C(0x0F8D8888), UINT32_C(0x06D5E9F8), + UINT32_C(0x08616DB1), UINT32_C(0x032C0CBB), UINT32_C(0x0AA3299C), + UINT32_C(0x03F194B4), UINT32_C(0x00D0F72D), UINT32_C(0x0B3FCCBD), + UINT32_C(0x02803044), UINT32_C(0x0A08E3C3), UINT32_C(0x037A0997), + UINT32_C(0x05DC3B19) } }, + { { UINT32_C(0x085193F0), UINT32_C(0x019978F4), UINT32_C(0x0BF0C234), + UINT32_C(0x04F7BBC1), UINT32_C(0x0722B6D6), UINT32_C(0x013DCEE7), + UINT32_C(0x05D575CD), UINT32_C(0x0779F809), UINT32_C(0x06335183), + UINT32_C(0x0DCC718C), UINT32_C(0x02D1E7DB), UINT32_C(0x0F6A6D57), + UINT32_C(0x065A96BF), UINT32_C(0x065930E7), UINT32_C(0x039B793F), + UINT32_C(0x06A9BA2E), UINT32_C(0x0C033596), UINT32_C(0x01BE1126), + UINT32_C(0x03EA93B8) }, + { UINT32_C(0x03161177), UINT32_C(0x002665D5), UINT32_C(0x017B69C9), + UINT32_C(0x07892DD4), UINT32_C(0x0F6F8ECB), UINT32_C(0x0576AF37), + UINT32_C(0x03C1E515), UINT32_C(0x05A60E50), UINT32_C(0x02549873), + UINT32_C(0x09B3D920), UINT32_C(0x029DA082), UINT32_C(0x009DAE44), + UINT32_C(0x0197C8E7), UINT32_C(0x0154A33B), UINT32_C(0x097B3971), + UINT32_C(0x023C0423), UINT32_C(0x02B8C68C), UINT32_C(0x04DCA653), + UINT32_C(0x00079A0F) } }, + { { UINT32_C(0x063E2975), UINT32_C(0x06BEC9ED), UINT32_C(0x0B38790C), + UINT32_C(0x022D87D1), UINT32_C(0x0EA228A4), UINT32_C(0x010DBA9F), + UINT32_C(0x015868D8), UINT32_C(0x080C5E0D), UINT32_C(0x075196CF), + UINT32_C(0x0A3AFD7E), UINT32_C(0x031A6E14), UINT32_C(0x0E7A5374), + UINT32_C(0x067A8FE5), UINT32_C(0x06ECEB0D), UINT32_C(0x0B84F9C7), + UINT32_C(0x0680604D), UINT32_C(0x072314F9), UINT32_C(0x03A2F4B2), + UINT32_C(0x06C5081F) }, + { UINT32_C(0x0B981980), UINT32_C(0x0349CBF0), UINT32_C(0x072972B5), + UINT32_C(0x02885527), UINT32_C(0x0150CDBD), UINT32_C(0x07F178E3), + UINT32_C(0x032B4111), UINT32_C(0x0B2B4EF6), UINT32_C(0x000F21B3), + UINT32_C(0x039D39FF), UINT32_C(0x07E2383D), UINT32_C(0x0F91A9DF), + UINT32_C(0x000BF2A4), UINT32_C(0x003EA686), UINT32_C(0x06E3C109), + UINT32_C(0x05D771D7), UINT32_C(0x03336F2A), UINT32_C(0x00A9A15C), + UINT32_C(0x0310BC8B) } }, + { { UINT32_C(0x082B5AA4), UINT32_C(0x04A7240C), UINT32_C(0x00ABF375), + UINT32_C(0x07E33DEB), UINT32_C(0x01BD8789), UINT32_C(0x06BA83A6), + UINT32_C(0x05A6491B), UINT32_C(0x04DB69BD), UINT32_C(0x010D6A55), + UINT32_C(0x0D5DAFA1), UINT32_C(0x06C7F999), UINT32_C(0x0185AD3E), + UINT32_C(0x027EAEB5), UINT32_C(0x006644C8), UINT32_C(0x0B9709E1), + UINT32_C(0x07676CF0), UINT32_C(0x0508273E), UINT32_C(0x054D3FBB), + UINT32_C(0x063EFA4A) }, + { UINT32_C(0x010AA767), UINT32_C(0x01CC5A04), UINT32_C(0x0BE5B1B3), + UINT32_C(0x06950FCE), UINT32_C(0x0E94E6DB), UINT32_C(0x0497BB17), + UINT32_C(0x00CC06B4), UINT32_C(0x08846F32), UINT32_C(0x0314DC3B), + UINT32_C(0x0BA27736), UINT32_C(0x0432450D), UINT32_C(0x04925C53), + UINT32_C(0x03119EE1), UINT32_C(0x04A66669), UINT32_C(0x05FBA305), + UINT32_C(0x033D4900), UINT32_C(0x0FE789AF), UINT32_C(0x0671EF4B), + UINT32_C(0x0259D6DF) } }, + { { UINT32_C(0x05C529C4), UINT32_C(0x04097FDD), UINT32_C(0x0296486E), + UINT32_C(0x05D5E29C), UINT32_C(0x0B3FABA2), UINT32_C(0x0695126C), + UINT32_C(0x0312362F), UINT32_C(0x08DC4B4B), UINT32_C(0x0413884F), + UINT32_C(0x067DDD33), UINT32_C(0x055DBD8F), UINT32_C(0x07D0B9CB), + UINT32_C(0x01BE7C35), UINT32_C(0x043BC43D), UINT32_C(0x00E5A19E), + UINT32_C(0x017725FC), UINT32_C(0x006A669F), UINT32_C(0x063FD379), + UINT32_C(0x0682F5E5) }, + { UINT32_C(0x0035FA1B), UINT32_C(0x0302079C), UINT32_C(0x0A397CF2), + UINT32_C(0x02A9E0EB), UINT32_C(0x0183E8FA), UINT32_C(0x00950C41), + UINT32_C(0x05ACFED2), UINT32_C(0x0B8DC827), UINT32_C(0x0004B05C), + UINT32_C(0x0ECD486A), UINT32_C(0x04FBAB30), UINT32_C(0x0A2FE908), + UINT32_C(0x05C95F6D), UINT32_C(0x06B30876), UINT32_C(0x0F3D7A8A), + UINT32_C(0x0734E57D), UINT32_C(0x0410C523), UINT32_C(0x057AD388), + UINT32_C(0x073AF161) } }, + { { UINT32_C(0x033E8718), UINT32_C(0x05E156C6), UINT32_C(0x0188F2D0), + UINT32_C(0x07B490F4), UINT32_C(0x0D1D9936), UINT32_C(0x045ACF91), + UINT32_C(0x05EADE92), UINT32_C(0x09204996), UINT32_C(0x03FB05AD), + UINT32_C(0x0952B30E), UINT32_C(0x066E8B73), UINT32_C(0x02E38706), + UINT32_C(0x06AD215A), UINT32_C(0x05770FF2), UINT32_C(0x0CCC64AA), + UINT32_C(0x00A77560), UINT32_C(0x084A4A57), UINT32_C(0x07428950), + UINT32_C(0x007783FF) }, + { UINT32_C(0x07864A53), UINT32_C(0x02B0B04D), UINT32_C(0x0CE9B903), + UINT32_C(0x032C4DB9), UINT32_C(0x0ED34B7B), UINT32_C(0x02B9BB80), + UINT32_C(0x0107A7A1), UINT32_C(0x0133502C), UINT32_C(0x06939D9B), + UINT32_C(0x07AE6A42), UINT32_C(0x01C55CB0), UINT32_C(0x0A087059), + UINT32_C(0x011E8069), UINT32_C(0x02AC5D81), UINT32_C(0x0FF470E4), + UINT32_C(0x068D4B88), UINT32_C(0x03B934D1), UINT32_C(0x01E86F4D), + UINT32_C(0x00286D40) } }, + { { UINT32_C(0x0A097CC4), UINT32_C(0x07C93D92), UINT32_C(0x03638A82), + UINT32_C(0x05D44662), UINT32_C(0x034F8801), UINT32_C(0x01E1B0E9), + UINT32_C(0x03132ED7), UINT32_C(0x0D61A771), UINT32_C(0x0777FA2F), + UINT32_C(0x0E4D4244), UINT32_C(0x02CDDCA4), UINT32_C(0x01988721), + UINT32_C(0x0694972F), UINT32_C(0x02AA2585), UINT32_C(0x06A552DD), + UINT32_C(0x02719251), UINT32_C(0x0C4FD604), UINT32_C(0x033FC4DD), + UINT32_C(0x02A49BC5) }, + { UINT32_C(0x0ECC32F4), UINT32_C(0x03998CBA), UINT32_C(0x0E555140), + UINT32_C(0x06BE70C6), UINT32_C(0x02ECE0DB), UINT32_C(0x07D7EE62), + UINT32_C(0x006B8450), UINT32_C(0x0C677BF6), UINT32_C(0x0065EEBA), + UINT32_C(0x0C8F791B), UINT32_C(0x05880489), UINT32_C(0x07724E1B), + UINT32_C(0x00C43815), UINT32_C(0x079C7129), UINT32_C(0x0AC7BD8B), + UINT32_C(0x00B35A76), UINT32_C(0x0E62F127), UINT32_C(0x06892912), + UINT32_C(0x069DE730) } }, + { { UINT32_C(0x0D176E2E), UINT32_C(0x04BD43B7), UINT32_C(0x0843A348), + UINT32_C(0x0749D5C1), UINT32_C(0x0ED9CC05), UINT32_C(0x00305C32), + UINT32_C(0x037CC7F4), UINT32_C(0x03DF22FB), UINT32_C(0x05799B29), + UINT32_C(0x0BAA8556), UINT32_C(0x01B9550B), UINT32_C(0x0B71D97D), + UINT32_C(0x071866D2), UINT32_C(0x042A76ED), UINT32_C(0x0CF558E6), + UINT32_C(0x05C52446), UINT32_C(0x0E80A5C3), UINT32_C(0x0732DC8B), + UINT32_C(0x05430293) }, + { UINT32_C(0x08A05AA1), UINT32_C(0x060E94EA), UINT32_C(0x0495DB83), + UINT32_C(0x07F23E7E), UINT32_C(0x09BABC6A), UINT32_C(0x07B134F3), + UINT32_C(0x02C60301), UINT32_C(0x0C76C75A), UINT32_C(0x0496E91D), + UINT32_C(0x0354A538), UINT32_C(0x03F832DB), UINT32_C(0x03139812), + UINT32_C(0x028BB56E), UINT32_C(0x06BC315A), UINT32_C(0x08F87E08), + UINT32_C(0x04EB9933), UINT32_C(0x0D94A083), UINT32_C(0x00F1E782), + UINT32_C(0x00039DA7) } }, + { { UINT32_C(0x0F46E9D5), UINT32_C(0x04AFDE7F), UINT32_C(0x02DD9156), + UINT32_C(0x03A43A4A), UINT32_C(0x0334CF91), UINT32_C(0x06B820D5), + UINT32_C(0x02AB098A), UINT32_C(0x010407F3), UINT32_C(0x06E15825), + UINT32_C(0x0DE19BBC), UINT32_C(0x05C155A7), UINT32_C(0x098AB480), + UINT32_C(0x027F0A26), UINT32_C(0x001E493A), UINT32_C(0x0D3BF154), + UINT32_C(0x0022BB7B), UINT32_C(0x092F7F8A), UINT32_C(0x025E06B0), + UINT32_C(0x0214EC84) }, + { UINT32_C(0x0E367447), UINT32_C(0x07A76C60), UINT32_C(0x0E7F25B2), + UINT32_C(0x061DC274), UINT32_C(0x08037471), UINT32_C(0x0601CC83), + UINT32_C(0x077C01C1), UINT32_C(0x0BD797B8), UINT32_C(0x07A2D854), + UINT32_C(0x0F539925), UINT32_C(0x00056A50), UINT32_C(0x0F52ABBB), + UINT32_C(0x01C407C4), UINT32_C(0x046E3EC8), UINT32_C(0x08C6B255), + UINT32_C(0x06BB4D5F), UINT32_C(0x09336DFF), UINT32_C(0x00D914F1), + UINT32_C(0x01F9DBAA) } }, + { { UINT32_C(0x0D831A04), UINT32_C(0x05A97D33), UINT32_C(0x0906D401), + UINT32_C(0x01E543D5), UINT32_C(0x063B64A7), UINT32_C(0x01DF1F04), + UINT32_C(0x07BEAE26), UINT32_C(0x0C4C51CE), UINT32_C(0x071253E1), + UINT32_C(0x07C5C1BC), UINT32_C(0x0686EDD8), UINT32_C(0x0EADB491), + UINT32_C(0x06FCC7E8), UINT32_C(0x04DC895B), UINT32_C(0x0DA99CB1), + UINT32_C(0x07538043), UINT32_C(0x0DCCD221), UINT32_C(0x05338542), + UINT32_C(0x0263F3E2) }, + { UINT32_C(0x049B2FC3), UINT32_C(0x00D9571D), UINT32_C(0x09A6B74E), + UINT32_C(0x013E9069), UINT32_C(0x0C142061), UINT32_C(0x0661D5AE), + UINT32_C(0x078F1467), UINT32_C(0x0568D3A9), UINT32_C(0x02729AA5), + UINT32_C(0x0749905F), UINT32_C(0x02491337), UINT32_C(0x0A8EED74), + UINT32_C(0x070FB80C), UINT32_C(0x066BA15B), UINT32_C(0x087A7668), + UINT32_C(0x03342CBD), UINT32_C(0x0FCD50D2), UINT32_C(0x017CF7F9), + UINT32_C(0x05DA6EDD) } }, + }, + { + { { UINT32_C(0x08ECE594), UINT32_C(0x02E6D7AF), UINT32_C(0x0160833B), + UINT32_C(0x05E9199C), UINT32_C(0x05C1EB44), UINT32_C(0x01F9CDD2), + UINT32_C(0x04ECBF7E), UINT32_C(0x011F5E2E), UINT32_C(0x00B16683), + UINT32_C(0x082C80F7), UINT32_C(0x04F6D76E), UINT32_C(0x0A9035A2), + UINT32_C(0x02A6F996), UINT32_C(0x07CF51EF), UINT32_C(0x011C78A5), + UINT32_C(0x03E6811A), UINT32_C(0x0DCCBD54), UINT32_C(0x029CA158), + UINT32_C(0x0188556B) }, + { UINT32_C(0x0EBBFAD7), UINT32_C(0x036D4FEF), UINT32_C(0x0DAD8CB2), + UINT32_C(0x024C5461), UINT32_C(0x09F7253C), UINT32_C(0x052C8206), + UINT32_C(0x03009FD7), UINT32_C(0x05A4E883), UINT32_C(0x04FFDBF8), + UINT32_C(0x07B5A2D0), UINT32_C(0x0487033B), UINT32_C(0x003EABFC), + UINT32_C(0x0107E479), UINT32_C(0x0479A422), UINT32_C(0x0ECEA707), + UINT32_C(0x05D06F61), UINT32_C(0x05BD0428), UINT32_C(0x01301D97), + UINT32_C(0x0137ADE9) } }, + { { UINT32_C(0x008164D4), UINT32_C(0x02998A00), UINT32_C(0x0E9FE1D5), + UINT32_C(0x05B9A827), UINT32_C(0x0AA45754), UINT32_C(0x06793FDD), + UINT32_C(0x01D8C060), UINT32_C(0x030ECBF4), UINT32_C(0x01FDC34C), + UINT32_C(0x0FA8650F), UINT32_C(0x0739AA31), UINT32_C(0x0905FB0D), + UINT32_C(0x04B98585), UINT32_C(0x04528DD9), UINT32_C(0x0582E0E8), + UINT32_C(0x0685885D), UINT32_C(0x008F4125), UINT32_C(0x02A15C01), + UINT32_C(0x023D540D) }, + { UINT32_C(0x039B003C), UINT32_C(0x074C5CC0), UINT32_C(0x029B2FBB), + UINT32_C(0x07F27890), UINT32_C(0x0C083234), UINT32_C(0x054081D7), + UINT32_C(0x0109E54D), UINT32_C(0x08920F8E), UINT32_C(0x07D87B98), + UINT32_C(0x07E36E68), UINT32_C(0x023912DB), UINT32_C(0x071A5BBC), + UINT32_C(0x0733E49F), UINT32_C(0x058495D0), UINT32_C(0x0068F694), + UINT32_C(0x012DCC7D), UINT32_C(0x0DC88ED4), UINT32_C(0x06D1A2D4), + UINT32_C(0x02BBA636) } }, + { { UINT32_C(0x0B78796F), UINT32_C(0x0335FA8E), UINT32_C(0x0243FD16), + UINT32_C(0x03C6B319), UINT32_C(0x01CD8CA8), UINT32_C(0x0704FAEE), + UINT32_C(0x04540F1E), UINT32_C(0x092AC9A2), UINT32_C(0x020A1CA3), + UINT32_C(0x023FC6DD), UINT32_C(0x01EFAF42), UINT32_C(0x00BC4AB2), + UINT32_C(0x0206DD26), UINT32_C(0x07400CF2), UINT32_C(0x072BD012), + UINT32_C(0x00840AB3), UINT32_C(0x016D752E), UINT32_C(0x00CEF006), + UINT32_C(0x0647C23D) }, + { UINT32_C(0x0F6CA70B), UINT32_C(0x05AFF85F), UINT32_C(0x031691E3), + UINT32_C(0x01063899), UINT32_C(0x02420E8C), UINT32_C(0x03D2D13C), + UINT32_C(0x059E8A01), UINT32_C(0x0FC5FC43), UINT32_C(0x042A852F), + UINT32_C(0x06446FD4), UINT32_C(0x0341CB5B), UINT32_C(0x044193ED), + UINT32_C(0x073BE475), UINT32_C(0x051FCBEA), UINT32_C(0x00D6D405), + UINT32_C(0x00A0026F), UINT32_C(0x09A09555), UINT32_C(0x0037DFDB), + UINT32_C(0x0186A76D) } }, + { { UINT32_C(0x06762E69), UINT32_C(0x05E586F2), UINT32_C(0x08A5D295), + UINT32_C(0x021AEB8A), UINT32_C(0x0D8E9356), UINT32_C(0x05E8F45E), + UINT32_C(0x04336CB6), UINT32_C(0x04373909), UINT32_C(0x020299B5), + UINT32_C(0x013EB290), UINT32_C(0x061E0E31), UINT32_C(0x07167125), + UINT32_C(0x01291CE5), UINT32_C(0x05F204F5), UINT32_C(0x060A0EA2), + UINT32_C(0x0414B179), UINT32_C(0x064F6F43), UINT32_C(0x0114060E), + UINT32_C(0x040928CF) }, + { UINT32_C(0x0B54A6C6), UINT32_C(0x010FE7C2), UINT32_C(0x0FDA19CB), + UINT32_C(0x056B791E), UINT32_C(0x049ED286), UINT32_C(0x02401472), + UINT32_C(0x048F8CD1), UINT32_C(0x0EAC2400), UINT32_C(0x075D6078), + UINT32_C(0x0EAAD7B3), UINT32_C(0x051EDE19), UINT32_C(0x0D7E6F09), + UINT32_C(0x001044A9), UINT32_C(0x0411E3BA), UINT32_C(0x0D3647C4), + UINT32_C(0x00168497), UINT32_C(0x08BA1235), UINT32_C(0x01C93676), + UINT32_C(0x01411BDC) } }, + { { UINT32_C(0x07F5FEA0), UINT32_C(0x068F1494), UINT32_C(0x0CF3659A), + UINT32_C(0x034F4CD5), UINT32_C(0x08840E07), UINT32_C(0x01463227), + UINT32_C(0x02CE4099), UINT32_C(0x00306A1A), UINT32_C(0x043276DA), + UINT32_C(0x0C0A79A8), UINT32_C(0x045485DA), UINT32_C(0x0D43B7E5), + UINT32_C(0x0245D30D), UINT32_C(0x07040ECA), UINT32_C(0x0F0944E2), + UINT32_C(0x02FAB448), UINT32_C(0x0A3418D6), UINT32_C(0x00AEEE32), + UINT32_C(0x054B0477) }, + { UINT32_C(0x002E1A49), UINT32_C(0x02417738), UINT32_C(0x003FC230), + UINT32_C(0x057B81BC), UINT32_C(0x09252F9B), UINT32_C(0x071E923E), + UINT32_C(0x07556FE9), UINT32_C(0x0405C043), UINT32_C(0x05F4A479), + UINT32_C(0x00AE6EBC), UINT32_C(0x0470CEA9), UINT32_C(0x043EFE7F), + UINT32_C(0x032F779B), UINT32_C(0x05D5E4C1), UINT32_C(0x0F412FF3), + UINT32_C(0x029E0A95), UINT32_C(0x027FF900), UINT32_C(0x0639C4FE), + UINT32_C(0x05496FF2) } }, + { { UINT32_C(0x093A81E5), UINT32_C(0x06552EA0), UINT32_C(0x076C940F), + UINT32_C(0x04D9EBF4), UINT32_C(0x07435E68), UINT32_C(0x00026B20), + UINT32_C(0x022F07A1), UINT32_C(0x0D1152A6), UINT32_C(0x01605EB4), + UINT32_C(0x021ED2B3), UINT32_C(0x0416BC52), UINT32_C(0x0F03BB25), + UINT32_C(0x032FD879), UINT32_C(0x0224E24D), UINT32_C(0x0227BC06), + UINT32_C(0x07E18BB7), UINT32_C(0x0846E10C), UINT32_C(0x025383D2), + UINT32_C(0x0716FE98) }, + { UINT32_C(0x048353E7), UINT32_C(0x06A51D17), UINT32_C(0x0602B7B4), + UINT32_C(0x00A3A912), UINT32_C(0x00D41798), UINT32_C(0x009BAAA2), + UINT32_C(0x014F6863), UINT32_C(0x0B8C9E0C), UINT32_C(0x004E89E7), + UINT32_C(0x01EA2B4D), UINT32_C(0x069FE41B), UINT32_C(0x0E23CD44), + UINT32_C(0x0284C3F8), UINT32_C(0x0709633E), UINT32_C(0x00EC122E), + UINT32_C(0x054C3546), UINT32_C(0x0274CE48), UINT32_C(0x0562858C), + UINT32_C(0x00845131) } }, + { { UINT32_C(0x093C77DA), UINT32_C(0x01D351AD), UINT32_C(0x023A3C02), + UINT32_C(0x050A84F5), UINT32_C(0x0D2278BA), UINT32_C(0x0166F47B), + UINT32_C(0x010E24C3), UINT32_C(0x0171F355), UINT32_C(0x070D70CC), + UINT32_C(0x0F04C14A), UINT32_C(0x0675CE80), UINT32_C(0x03C92277), + UINT32_C(0x027C5314), UINT32_C(0x0475432E), UINT32_C(0x0A42C984), + UINT32_C(0x021A86BA), UINT32_C(0x09667047), UINT32_C(0x0162D620), + UINT32_C(0x05CE1F5E) }, + { UINT32_C(0x0541016D), UINT32_C(0x04AA27AD), UINT32_C(0x024272A0), + UINT32_C(0x0124A937), UINT32_C(0x04022798), UINT32_C(0x04C4908F), + UINT32_C(0x078D2755), UINT32_C(0x05FC4690), UINT32_C(0x03D49867), + UINT32_C(0x0D0542ED), UINT32_C(0x014AC0C6), UINT32_C(0x0444F4AA), + UINT32_C(0x0527B53A), UINT32_C(0x04E463E4), UINT32_C(0x084795B9), + UINT32_C(0x06190D53), UINT32_C(0x01F0982A), UINT32_C(0x06C19AFA), + UINT32_C(0x02B40A43) } }, + { { UINT32_C(0x0D526DD9), UINT32_C(0x02D2A436), UINT32_C(0x06CBC632), + UINT32_C(0x06A016EB), UINT32_C(0x0229215C), UINT32_C(0x063A186E), + UINT32_C(0x056A2652), UINT32_C(0x0982D8F0), UINT32_C(0x04950B55), + UINT32_C(0x0C34A068), UINT32_C(0x036F958C), UINT32_C(0x0EC7C304), + UINT32_C(0x00685912), UINT32_C(0x00521605), UINT32_C(0x074386C5), + UINT32_C(0x06C5C880), UINT32_C(0x01D5C0E0), UINT32_C(0x0321B5FC), + UINT32_C(0x031F89D8) }, + { UINT32_C(0x0E4F4EFB), UINT32_C(0x042EF02C), UINT32_C(0x0747294D), + UINT32_C(0x06315147), UINT32_C(0x09826B36), UINT32_C(0x044F7A99), + UINT32_C(0x00DA6A3B), UINT32_C(0x0B192C6C), UINT32_C(0x017D9CD6), + UINT32_C(0x07D0FC8D), UINT32_C(0x00306186), UINT32_C(0x0DA5FD2C), + UINT32_C(0x048EA8B6), UINT32_C(0x041BED38), UINT32_C(0x028A7681), + UINT32_C(0x0444E09E), UINT32_C(0x07A1C182), UINT32_C(0x06CEB6B8), + UINT32_C(0x0402E972) } }, + { { UINT32_C(0x0A37CD61), UINT32_C(0x07A90498), UINT32_C(0x03236B70), + UINT32_C(0x010D1CA8), UINT32_C(0x0C8EE94C), UINT32_C(0x01332402), + UINT32_C(0x00D01671), UINT32_C(0x0D20BD0A), UINT32_C(0x04F8905D), + UINT32_C(0x0CB75503), UINT32_C(0x07C71184), UINT32_C(0x04D224FF), + UINT32_C(0x05EF5D3B), UINT32_C(0x02D2D84B), UINT32_C(0x0776D6B8), + UINT32_C(0x01B04C47), UINT32_C(0x0C6883AD), UINT32_C(0x041BC984), + UINT32_C(0x0738830F) }, + { UINT32_C(0x008A7408), UINT32_C(0x01833053), UINT32_C(0x0DCDED77), + UINT32_C(0x0660E3CD), UINT32_C(0x003541F4), UINT32_C(0x06650324), + UINT32_C(0x056D1103), UINT32_C(0x012DDC16), UINT32_C(0x04858446), + UINT32_C(0x031BD98F), UINT32_C(0x07EA97C0), UINT32_C(0x033EA10E), + UINT32_C(0x07E40598), UINT32_C(0x03935067), UINT32_C(0x06BD3C58), + UINT32_C(0x0709A382), UINT32_C(0x0FFD62B5), UINT32_C(0x03ACA64E), + UINT32_C(0x02BDB05C) } }, + { { UINT32_C(0x019DDB66), UINT32_C(0x0151276D), UINT32_C(0x0D169D42), + UINT32_C(0x07424F74), UINT32_C(0x0073574B), UINT32_C(0x029D6033), + UINT32_C(0x04805B63), UINT32_C(0x0FF3CCB8), UINT32_C(0x0657BEB9), + UINT32_C(0x06710C8D), UINT32_C(0x076A0EFE), UINT32_C(0x05FFC38A), + UINT32_C(0x039B2127), UINT32_C(0x04A7D60B), UINT32_C(0x0D352201), + UINT32_C(0x0459932F), UINT32_C(0x0A56306E), UINT32_C(0x05D63C8E), + UINT32_C(0x01727D3E) }, + { UINT32_C(0x0A228C02), UINT32_C(0x0454E2FD), UINT32_C(0x0C5CF406), + UINT32_C(0x072A6748), UINT32_C(0x09478B3C), UINT32_C(0x01C032C4), + UINT32_C(0x024B1CF3), UINT32_C(0x07BCB89A), UINT32_C(0x017F8136), + UINT32_C(0x03BFA207), UINT32_C(0x0032CE35), UINT32_C(0x01301C08), + UINT32_C(0x01F1D68E), UINT32_C(0x024447E0), UINT32_C(0x00655D3F), + UINT32_C(0x04B5B6DB), UINT32_C(0x08F50A61), UINT32_C(0x07FE19DA), + UINT32_C(0x01906979) } }, + { { UINT32_C(0x04E80EB1), UINT32_C(0x052DB749), UINT32_C(0x0FA876FF), + UINT32_C(0x014D563E), UINT32_C(0x0DD8DCB4), UINT32_C(0x06D08CF5), + UINT32_C(0x0088B6C9), UINT32_C(0x099DAF2C), UINT32_C(0x06ADE3E9), + UINT32_C(0x05F27F40), UINT32_C(0x076292C5), UINT32_C(0x02149C44), + UINT32_C(0x04ECED26), UINT32_C(0x04016166), UINT32_C(0x0E8DD0F0), + UINT32_C(0x02703366), UINT32_C(0x09A4D3F8), UINT32_C(0x000C4924), + UINT32_C(0x066F3B89) }, + { UINT32_C(0x00F92986), UINT32_C(0x001B8CB3), UINT32_C(0x0C27E556), + UINT32_C(0x05EAB0C7), UINT32_C(0x0A95BBEF), UINT32_C(0x011331B7), + UINT32_C(0x03245504), UINT32_C(0x0B108EBA), UINT32_C(0x0704FE66), + UINT32_C(0x0AEECF39), UINT32_C(0x0485E096), UINT32_C(0x0D5B3E1E), + UINT32_C(0x02DB3A00), UINT32_C(0x06FBA80E), UINT32_C(0x0AEE0EA5), + UINT32_C(0x064273CE), UINT32_C(0x0CD775D3), UINT32_C(0x00232462), + UINT32_C(0x0347DCE7) } }, + { { UINT32_C(0x029AE558), UINT32_C(0x07BED198), UINT32_C(0x073802BF), + UINT32_C(0x0528429C), UINT32_C(0x02A79F18), UINT32_C(0x045BFA11), + UINT32_C(0x07B77865), UINT32_C(0x065D4D35), UINT32_C(0x03701A97), + UINT32_C(0x03C87FB5), UINT32_C(0x07338AED), UINT32_C(0x0260F0C6), + UINT32_C(0x032E371B), UINT32_C(0x048EAB15), UINT32_C(0x06488CED), + UINT32_C(0x04349BDC), UINT32_C(0x09FF872F), UINT32_C(0x01EBC954), + UINT32_C(0x02644425) }, + { UINT32_C(0x0AAD22D1), UINT32_C(0x04DA634D), UINT32_C(0x0931B0A2), + UINT32_C(0x0366BA6D), UINT32_C(0x0A03F852), UINT32_C(0x003C4DA2), + UINT32_C(0x07BDDE59), UINT32_C(0x00543C06), UINT32_C(0x05EA4710), + UINT32_C(0x0622BACC), UINT32_C(0x03C86D6F), UINT32_C(0x0810EAB1), + UINT32_C(0x0128E64D), UINT32_C(0x02C5B6EF), UINT32_C(0x0F37432C), + UINT32_C(0x0391A4CD), UINT32_C(0x09344B8B), UINT32_C(0x007DDA34), + UINT32_C(0x02408EDC) } }, + { { UINT32_C(0x0EB8B398), UINT32_C(0x068DF986), UINT32_C(0x0BCADF8A), + UINT32_C(0x01829A9B), UINT32_C(0x017C9B77), UINT32_C(0x0446621A), + UINT32_C(0x026EE0C4), UINT32_C(0x0E0FE9B2), UINT32_C(0x0528FE1C), + UINT32_C(0x08E6DD5A), UINT32_C(0x018FB2E0), UINT32_C(0x0FD2A7AB), + UINT32_C(0x002E71A2), UINT32_C(0x069C2EFB), UINT32_C(0x0156F759), + UINT32_C(0x04F3A78E), UINT32_C(0x022C4533), UINT32_C(0x069A2816), + UINT32_C(0x03C034B1) }, + { UINT32_C(0x0D05FF6A), UINT32_C(0x07761186), UINT32_C(0x0D73ABC6), + UINT32_C(0x06AC086B), UINT32_C(0x0BF965A1), UINT32_C(0x05F6546D), + UINT32_C(0x07767397), UINT32_C(0x005C4608), UINT32_C(0x005803C4), + UINT32_C(0x024EE133), UINT32_C(0x05FC51BD), UINT32_C(0x099F0D97), + UINT32_C(0x00437C0C), UINT32_C(0x0553A827), UINT32_C(0x0FB0EB60), + UINT32_C(0x06A7AEC5), UINT32_C(0x07C31264), UINT32_C(0x020D4B32), + UINT32_C(0x045F6381) } }, + { { UINT32_C(0x04D9F1F8), UINT32_C(0x05315A15), UINT32_C(0x01990B25), + UINT32_C(0x01A6DE98), UINT32_C(0x036D854A), UINT32_C(0x03D25F0D), + UINT32_C(0x06673F83), UINT32_C(0x04C56936), UINT32_C(0x019ACD66), + UINT32_C(0x0C1F1C47), UINT32_C(0x04AD0FD3), UINT32_C(0x0148F4FA), + UINT32_C(0x07BC3A93), UINT32_C(0x02F86E22), UINT32_C(0x0291F62B), + UINT32_C(0x01F87233), UINT32_C(0x0F616501), UINT32_C(0x06C1B9E5), + UINT32_C(0x05FB6CAA) }, + { UINT32_C(0x0DAF0C41), UINT32_C(0x050BE47B), UINT32_C(0x0DD799BF), + UINT32_C(0x00BB8754), UINT32_C(0x07221726), UINT32_C(0x00F26A35), + UINT32_C(0x0474A809), UINT32_C(0x0250B288), UINT32_C(0x0680A8C1), + UINT32_C(0x09FDC598), UINT32_C(0x00424EA2), UINT32_C(0x09CADE7E), + UINT32_C(0x0092845D), UINT32_C(0x0301B24F), UINT32_C(0x0CF7BF3E), + UINT32_C(0x0747B26E), UINT32_C(0x04110EBF), UINT32_C(0x002FC650), + UINT32_C(0x066AF8B8) } }, + { { UINT32_C(0x06DBC74A), UINT32_C(0x02C31098), UINT32_C(0x069497D4), + UINT32_C(0x048864EC), UINT32_C(0x01E12C96), UINT32_C(0x03EE9F03), + UINT32_C(0x05400CB4), UINT32_C(0x00B9E174), UINT32_C(0x04923BC3), + UINT32_C(0x0B5B54EA), UINT32_C(0x04A635C8), UINT32_C(0x0039A770), + UINT32_C(0x079340D3), UINT32_C(0x02B053A6), UINT32_C(0x0AA8C800), + UINT32_C(0x073E66A4), UINT32_C(0x0304ED5B), UINT32_C(0x007ACB50), + UINT32_C(0x069EBA57) }, + { UINT32_C(0x04FA3D53), UINT32_C(0x050EF28C), UINT32_C(0x09A3C2CF), + UINT32_C(0x03DE9C58), UINT32_C(0x085E0F9C), UINT32_C(0x069D187C), + UINT32_C(0x04624402), UINT32_C(0x0C81F8BF), UINT32_C(0x02E444D9), + UINT32_C(0x0D776F3C), UINT32_C(0x02B966E8), UINT32_C(0x017A5803), + UINT32_C(0x005E79FE), UINT32_C(0x017FF63B), UINT32_C(0x05B01559), + UINT32_C(0x03097D34), UINT32_C(0x0F3A10BA), UINT32_C(0x0712D05A), + UINT32_C(0x03904282) } }, + { { UINT32_C(0x0727DDB2), UINT32_C(0x0322FBEE), UINT32_C(0x006E2FCD), + UINT32_C(0x07EA06FF), UINT32_C(0x0BA09E24), UINT32_C(0x00F733F8), + UINT32_C(0x03D6DCAE), UINT32_C(0x049125D5), UINT32_C(0x077E1A66), + UINT32_C(0x0D68AE84), UINT32_C(0x04F77FA6), UINT32_C(0x0964F229), + UINT32_C(0x011AD49C), UINT32_C(0x05CC02E9), UINT32_C(0x03E1CD67), + UINT32_C(0x06E9B6EE), UINT32_C(0x02ABE8BE), UINT32_C(0x056C7601), + UINT32_C(0x050C554C) }, + { UINT32_C(0x01B068CF), UINT32_C(0x012F41C1), UINT32_C(0x0CD31293), + UINT32_C(0x056F1C35), UINT32_C(0x0716CA13), UINT32_C(0x0544293E), + UINT32_C(0x06007211), UINT32_C(0x04F726E6), UINT32_C(0x007D49EF), + UINT32_C(0x0E336972), UINT32_C(0x031C46EF), UINT32_C(0x025A6106), + UINT32_C(0x05AA92B9), UINT32_C(0x011700B0), UINT32_C(0x011058CF), + UINT32_C(0x00395DAC), UINT32_C(0x02BBCCE0), UINT32_C(0x029EAC52), + UINT32_C(0x028A26A5) } }, + }, + { + { { UINT32_C(0x0FFE4858), UINT32_C(0x044AC143), UINT32_C(0x06252D69), + UINT32_C(0x03691755), UINT32_C(0x0DE0F670), UINT32_C(0x0295E478), + UINT32_C(0x05945AF8), UINT32_C(0x0A5D32CA), UINT32_C(0x0234DE82), + UINT32_C(0x0F67E075), UINT32_C(0x06115CED), UINT32_C(0x00AE3A40), + UINT32_C(0x04F21740), UINT32_C(0x05BA53F6), UINT32_C(0x05840CD3), + UINT32_C(0x02246AB6), UINT32_C(0x0A7E5891), UINT32_C(0x00E30EE3), + UINT32_C(0x06E32125) }, + { UINT32_C(0x028DA023), UINT32_C(0x0757D14A), UINT32_C(0x0F1F2367), + UINT32_C(0x071B23A0), UINT32_C(0x09FF6F22), UINT32_C(0x06AE99FC), + UINT32_C(0x07D2FAD3), UINT32_C(0x0C60DF70), UINT32_C(0x008ADC3F), + UINT32_C(0x090D9E92), UINT32_C(0x027C0C30), UINT32_C(0x01553F37), + UINT32_C(0x047ACF16), UINT32_C(0x017392AB), UINT32_C(0x05D9DD01), + UINT32_C(0x07D1EF5C), UINT32_C(0x039F6FB5), UINT32_C(0x029DC337), + UINT32_C(0x04960195) } }, + { { UINT32_C(0x0994A7B1), UINT32_C(0x00E9A7BA), UINT32_C(0x03544C1B), + UINT32_C(0x0606BDF6), UINT32_C(0x01F3406A), UINT32_C(0x0635C178), + UINT32_C(0x04CA0BE9), UINT32_C(0x09B74F10), UINT32_C(0x046E4155), + UINT32_C(0x0655718B), UINT32_C(0x06B58CFD), UINT32_C(0x00E2656C), + UINT32_C(0x0426833D), UINT32_C(0x063C550C), UINT32_C(0x049DDCA9), + UINT32_C(0x04F6A9FC), UINT32_C(0x0676F8FD), UINT32_C(0x07BCA38C), + UINT32_C(0x059BDCBC) }, + { UINT32_C(0x096F6D73), UINT32_C(0x0378FAEB), UINT32_C(0x0AA2949D), + UINT32_C(0x02979AD2), UINT32_C(0x0FD54FA0), UINT32_C(0x0358AB66), + UINT32_C(0x012D1C2E), UINT32_C(0x0A3E9433), UINT32_C(0x012502DC), + UINT32_C(0x0BF42C60), UINT32_C(0x02403252), UINT32_C(0x0B59A13D), + UINT32_C(0x07CE87D8), UINT32_C(0x06EFA510), UINT32_C(0x0F316813), + UINT32_C(0x048C6131), UINT32_C(0x0ABB4F2B), UINT32_C(0x00135CF6), + UINT32_C(0x019B839C) } }, + { { UINT32_C(0x0CDE12CD), UINT32_C(0x01F2EE46), UINT32_C(0x096668FC), + UINT32_C(0x06800020), UINT32_C(0x0D8D4DC3), UINT32_C(0x01F9D872), + UINT32_C(0x0074B363), UINT32_C(0x08E353D0), UINT32_C(0x06B87B06), + UINT32_C(0x05F1A3E4), UINT32_C(0x03D67702), UINT32_C(0x0AD5ACE9), + UINT32_C(0x024E9994), UINT32_C(0x03C2A440), UINT32_C(0x05A6C55C), + UINT32_C(0x045CAA47), UINT32_C(0x0AC34E77), UINT32_C(0x068E05E3), + UINT32_C(0x0598564E) }, + { UINT32_C(0x0366B021), UINT32_C(0x017935A2), UINT32_C(0x04F773DB), + UINT32_C(0x04629F66), UINT32_C(0x096AE2DC), UINT32_C(0x00DB3EE0), + UINT32_C(0x05684F63), UINT32_C(0x00391BA5), UINT32_C(0x07270BBB), + UINT32_C(0x0E28A705), UINT32_C(0x02BB0A4B), UINT32_C(0x097DCA61), + UINT32_C(0x04E133F5), UINT32_C(0x04899B3E), UINT32_C(0x00637ACF), + UINT32_C(0x02D4E63D), UINT32_C(0x09635CB7), UINT32_C(0x02DEDDE2), + UINT32_C(0x02229A95) } }, + { { UINT32_C(0x0CD34315), UINT32_C(0x02E1C8DC), UINT32_C(0x067A6FB7), + UINT32_C(0x03DB6FAE), UINT32_C(0x07281C55), UINT32_C(0x046AC647), + UINT32_C(0x002E790C), UINT32_C(0x0F3D1BC4), UINT32_C(0x0533A625), + UINT32_C(0x06417AC2), UINT32_C(0x018ACECE), UINT32_C(0x0B7019D6), + UINT32_C(0x06EDA9DA), UINT32_C(0x01938AF8), UINT32_C(0x029911BB), + UINT32_C(0x03E2995B), UINT32_C(0x0C0E3FBA), UINT32_C(0x011596D1), + UINT32_C(0x00271C3C) }, + { UINT32_C(0x0356A25A), UINT32_C(0x072A1ED9), UINT32_C(0x0EAF77B0), + UINT32_C(0x02B4B853), UINT32_C(0x0C759255), UINT32_C(0x02FB6C3D), + UINT32_C(0x0704DFA8), UINT32_C(0x0D59777F), UINT32_C(0x078F4FA8), + UINT32_C(0x03C11635), UINT32_C(0x02E52765), UINT32_C(0x02ACB74C), + UINT32_C(0x007731B9), UINT32_C(0x0137AD56), UINT32_C(0x063A4E6E), + UINT32_C(0x06744404), UINT32_C(0x09B78353), UINT32_C(0x04631A57), + UINT32_C(0x018C7F7E) } }, + { { UINT32_C(0x0EAD4FF9), UINT32_C(0x05871450), UINT32_C(0x07F9BF26), + UINT32_C(0x02BC1D4E), UINT32_C(0x00CD4484), UINT32_C(0x04EBA4AB), + UINT32_C(0x01DEDBB8), UINT32_C(0x0E25B38D), UINT32_C(0x049D1268), + UINT32_C(0x0D04AABB), UINT32_C(0x01AEF51D), UINT32_C(0x00829E43), + UINT32_C(0x05402C62), UINT32_C(0x0368D70D), UINT32_C(0x03775E01), + UINT32_C(0x04503803), UINT32_C(0x02B6C48D), UINT32_C(0x01FD101D), + UINT32_C(0x0025FF9E) }, + { UINT32_C(0x0B8B195A), UINT32_C(0x02323FFC), UINT32_C(0x00557FA3), + UINT32_C(0x073ED365), UINT32_C(0x0A376D54), UINT32_C(0x023A3994), + UINT32_C(0x00F1CC64), UINT32_C(0x080DCBBA), UINT32_C(0x01BB869C), + UINT32_C(0x084DE7DF), UINT32_C(0x03102B44), UINT32_C(0x0559CF4A), + UINT32_C(0x0385604A), UINT32_C(0x05CB3A44), UINT32_C(0x022C8F10), + UINT32_C(0x00AC8251), UINT32_C(0x0D40C893), UINT32_C(0x00107891), + UINT32_C(0x06795987) } }, + { { UINT32_C(0x06920A2A), UINT32_C(0x051ED07D), UINT32_C(0x0D40A6DB), + UINT32_C(0x004D5082), UINT32_C(0x0BB2B0B9), UINT32_C(0x046EEDFC), + UINT32_C(0x077C4F4D), UINT32_C(0x0025B307), UINT32_C(0x00CCCEED), + UINT32_C(0x05AD182A), UINT32_C(0x0734F059), UINT32_C(0x0B480EE5), + UINT32_C(0x0170F1CB), UINT32_C(0x0417A672), UINT32_C(0x05B933B3), + UINT32_C(0x0279BB07), UINT32_C(0x0341E8CB), UINT32_C(0x071F7EBF), + UINT32_C(0x0231AF93) }, + { UINT32_C(0x01CA3CCC), UINT32_C(0x042A30AF), UINT32_C(0x0E1E55F1), + UINT32_C(0x07A6A1AC), UINT32_C(0x0D95EC2F), UINT32_C(0x029E2CCD), + UINT32_C(0x00847505), UINT32_C(0x0184F443), UINT32_C(0x04B6D717), + UINT32_C(0x03764831), UINT32_C(0x043E0649), UINT32_C(0x0378A536), + UINT32_C(0x0430CAB4), UINT32_C(0x05B08C42), UINT32_C(0x0B147E31), + UINT32_C(0x0270B565), UINT32_C(0x056846E1), UINT32_C(0x0393806E), + UINT32_C(0x0102687E) } }, + { { UINT32_C(0x0EB5DCD3), UINT32_C(0x0185FC5D), UINT32_C(0x03181617), + UINT32_C(0x01479862), UINT32_C(0x0D1E00A3), UINT32_C(0x000E2351), + UINT32_C(0x041EA413), UINT32_C(0x0EC09039), UINT32_C(0x00213EFE), + UINT32_C(0x02085A51), UINT32_C(0x027B7641), UINT32_C(0x0EE239C0), + UINT32_C(0x06D0F7BB), UINT32_C(0x0267C803), UINT32_C(0x0B79A7EE), + UINT32_C(0x0681FFDF), UINT32_C(0x08DFF64B), UINT32_C(0x0688C37C), + UINT32_C(0x03D1AE9F) }, + { UINT32_C(0x03B68E6C), UINT32_C(0x07F04BE5), UINT32_C(0x060E4D0D), + UINT32_C(0x0534899D), UINT32_C(0x0FA52B9C), UINT32_C(0x001C4752), + UINT32_C(0x00BCA60E), UINT32_C(0x041ED165), UINT32_C(0x01DBEB9D), + UINT32_C(0x04BEFD90), UINT32_C(0x05B1A36F), UINT32_C(0x0C6DA7CD), + UINT32_C(0x025F29BF), UINT32_C(0x0143D052), UINT32_C(0x099FCD3B), + UINT32_C(0x04934EE0), UINT32_C(0x00F9287C), UINT32_C(0x06BF2174), + UINT32_C(0x05D3AAEB) } }, + { { UINT32_C(0x0B07B1BF), UINT32_C(0x008B8614), UINT32_C(0x00E21485), + UINT32_C(0x07064A8F), UINT32_C(0x04328BCA), UINT32_C(0x0126ADF3), + UINT32_C(0x07D9CEFE), UINT32_C(0x0B5FE8D9), UINT32_C(0x03B144E7), + UINT32_C(0x0FF1E126), UINT32_C(0x06AF8F59), UINT32_C(0x07A6CE02), + UINT32_C(0x07F9BE52), UINT32_C(0x003588EF), UINT32_C(0x0EFF3D3A), + UINT32_C(0x052C77D2), UINT32_C(0x010CACE8), UINT32_C(0x05B1B51F), + UINT32_C(0x06F19D06) }, + { UINT32_C(0x042166D8), UINT32_C(0x04CD028C), UINT32_C(0x039C24AE), + UINT32_C(0x02C03F19), UINT32_C(0x067F4B98), UINT32_C(0x020FC733), + UINT32_C(0x01DAB42C), UINT32_C(0x02FF3B82), UINT32_C(0x048BCF28), + UINT32_C(0x019BFE25), UINT32_C(0x05777D5F), UINT32_C(0x06871AF8), + UINT32_C(0x04139F9E), UINT32_C(0x07211D99), UINT32_C(0x0AD09893), + UINT32_C(0x01E0FD46), UINT32_C(0x02906E37), UINT32_C(0x028275DB), + UINT32_C(0x046A1575) } }, + { { UINT32_C(0x08AA3834), UINT32_C(0x06C07864), UINT32_C(0x0E044947), + UINT32_C(0x03335EFD), UINT32_C(0x067B5E62), UINT32_C(0x034C6315), + UINT32_C(0x07572306), UINT32_C(0x07CFC444), UINT32_C(0x01B85C68), + UINT32_C(0x04AE9317), UINT32_C(0x004244BB), UINT32_C(0x02B9387A), + UINT32_C(0x07EC501D), UINT32_C(0x030A85A4), UINT32_C(0x035462ED), + UINT32_C(0x0713AD0C), UINT32_C(0x053851AC), UINT32_C(0x02FE3E5B), + UINT32_C(0x06B40EB3) }, + { UINT32_C(0x053E08C6), UINT32_C(0x05772205), UINT32_C(0x030BB610), + UINT32_C(0x008EE615), UINT32_C(0x0B7E6CE7), UINT32_C(0x00783E50), + UINT32_C(0x0096806A), UINT32_C(0x066126FD), UINT32_C(0x051C1C80), + UINT32_C(0x0ECBCD5E), UINT32_C(0x03A28DED), UINT32_C(0x08FD6395), + UINT32_C(0x022A192F), UINT32_C(0x0736A4A0), UINT32_C(0x01369C64), + UINT32_C(0x02AB6ECE), UINT32_C(0x06E0E541), UINT32_C(0x03248146), + UINT32_C(0x00948603) } }, + { { UINT32_C(0x069B34EA), UINT32_C(0x0336603F), UINT32_C(0x06DBFFB7), + UINT32_C(0x0300F54C), UINT32_C(0x03402123), UINT32_C(0x04E1356D), + UINT32_C(0x04422E8C), UINT32_C(0x0C555F86), UINT32_C(0x065AB272), + UINT32_C(0x053F830F), UINT32_C(0x0579A41E), UINT32_C(0x0FEFEF91), + UINT32_C(0x004E0795), UINT32_C(0x016107F9), UINT32_C(0x08D654BD), + UINT32_C(0x04ABFECE), UINT32_C(0x06C9D84D), UINT32_C(0x03813525), + UINT32_C(0x07CB6F50) }, + { UINT32_C(0x09047156), UINT32_C(0x010B8EB7), UINT32_C(0x0CC6FC83), + UINT32_C(0x0431B14F), UINT32_C(0x03572502), UINT32_C(0x076096FF), + UINT32_C(0x0028C298), UINT32_C(0x066F3BBA), UINT32_C(0x00B06491), + UINT32_C(0x0665164A), UINT32_C(0x04A5A55D), UINT32_C(0x02DAC096), + UINT32_C(0x03E71E1C), UINT32_C(0x0256A93B), UINT32_C(0x04C0530A), + UINT32_C(0x062EDF21), UINT32_C(0x0F59E8F8), UINT32_C(0x019409ED), + UINT32_C(0x07A2F4BF) } }, + { { UINT32_C(0x0665B1CF), UINT32_C(0x0034F110), UINT32_C(0x0E6E0C55), + UINT32_C(0x05548084), UINT32_C(0x0CB9C817), UINT32_C(0x010A8F87), + UINT32_C(0x012A9C49), UINT32_C(0x0982F57E), UINT32_C(0x00D5BB56), + UINT32_C(0x0649D707), UINT32_C(0x00C86A10), UINT32_C(0x0C3ED33B), + UINT32_C(0x065AEDD0), UINT32_C(0x061D08CC), UINT32_C(0x010AAD5D), + UINT32_C(0x015E11C5), UINT32_C(0x0CE68252), UINT32_C(0x03DCA282), + UINT32_C(0x023E7D61) }, + { UINT32_C(0x094CC511), UINT32_C(0x053544CA), UINT32_C(0x067DDC2E), + UINT32_C(0x022C5BA7), UINT32_C(0x0E503DBC), UINT32_C(0x06CD2E73), + UINT32_C(0x058CE06F), UINT32_C(0x072AA3E8), UINT32_C(0x06DB1977), + UINT32_C(0x04494EBF), UINT32_C(0x00968BBC), UINT32_C(0x02E8F607), + UINT32_C(0x06F93369), UINT32_C(0x00836553), UINT32_C(0x05A73753), + UINT32_C(0x03A8B586), UINT32_C(0x00A046AC), UINT32_C(0x0211F089), + UINT32_C(0x0389954D) } }, + { { UINT32_C(0x0BB13D25), UINT32_C(0x023A4F60), UINT32_C(0x05B894C3), + UINT32_C(0x01F6CF6C), UINT32_C(0x0F316A82), UINT32_C(0x07269483), + UINT32_C(0x0724D1FF), UINT32_C(0x081060C2), UINT32_C(0x07213116), + UINT32_C(0x0B65307F), UINT32_C(0x06CB9993), UINT32_C(0x04580D3B), + UINT32_C(0x064521E7), UINT32_C(0x07FA9810), UINT32_C(0x00B180DF), + UINT32_C(0x058701A7), UINT32_C(0x08BFB845), UINT32_C(0x0175BF68), + UINT32_C(0x02BF1464) }, + { UINT32_C(0x04B66F01), UINT32_C(0x059EAFDA), UINT32_C(0x02EB7B38), + UINT32_C(0x0382ED4B), UINT32_C(0x0D3E8A47), UINT32_C(0x061E1C44), + UINT32_C(0x06369F05), UINT32_C(0x0221CD6C), UINT32_C(0x033836B4), + UINT32_C(0x0580C2E2), UINT32_C(0x071C3002), UINT32_C(0x0C51E97D), + UINT32_C(0x06D684C3), UINT32_C(0x074D62F1), UINT32_C(0x0851439A), + UINT32_C(0x038AB710), UINT32_C(0x0300D39E), UINT32_C(0x0390C464), + UINT32_C(0x04D98E09) } }, + { { UINT32_C(0x0140A004), UINT32_C(0x00D68C0B), UINT32_C(0x080890B3), + UINT32_C(0x07D532CC), UINT32_C(0x05EC2C5B), UINT32_C(0x065415DB), + UINT32_C(0x021CBEF3), UINT32_C(0x0C92C4C7), UINT32_C(0x002C11E2), + UINT32_C(0x087FFDBE), UINT32_C(0x00BBD5AB), UINT32_C(0x0D3147C6), + UINT32_C(0x027322CF), UINT32_C(0x048AE30E), UINT32_C(0x0A78BD27), + UINT32_C(0x06E52637), UINT32_C(0x0F79BB43), UINT32_C(0x05C2CDD9), + UINT32_C(0x03AEDAB1) }, + { UINT32_C(0x01F8F797), UINT32_C(0x05E078E8), UINT32_C(0x0A430953), + UINT32_C(0x079FE860), UINT32_C(0x098B3236), UINT32_C(0x00A0033B), + UINT32_C(0x0311C26A), UINT32_C(0x02325326), UINT32_C(0x021CEBBC), + UINT32_C(0x01C498E4), UINT32_C(0x02365440), UINT32_C(0x091FBA94), + UINT32_C(0x017487BB), UINT32_C(0x0321A8D5), UINT32_C(0x071AEF9F), + UINT32_C(0x047D457D), UINT32_C(0x01BCFB0E), UINT32_C(0x0071F7BC), + UINT32_C(0x075AEFAA) } }, + { { UINT32_C(0x0C98DFAE), UINT32_C(0x01C5257A), UINT32_C(0x06506435), + UINT32_C(0x00916D1A), UINT32_C(0x0D65B633), UINT32_C(0x06BAC13A), + UINT32_C(0x013D2F72), UINT32_C(0x0B8C7FD1), UINT32_C(0x0068E619), + UINT32_C(0x0C30A25B), UINT32_C(0x016EBDF8), UINT32_C(0x0D8A2E42), + UINT32_C(0x01E2AB8D), UINT32_C(0x07855AFB), UINT32_C(0x01F15FBB), + UINT32_C(0x01DA4917), UINT32_C(0x074DB277), UINT32_C(0x030BAC3C), + UINT32_C(0x01B1B048) }, + { UINT32_C(0x00C92FB5), UINT32_C(0x00781A5F), UINT32_C(0x0B53EE11), + UINT32_C(0x04366DE3), UINT32_C(0x0D7AFCA1), UINT32_C(0x04C3CAB8), + UINT32_C(0x031EB35F), UINT32_C(0x00CDDA16), UINT32_C(0x05DB2AA4), + UINT32_C(0x0EEC79C5), UINT32_C(0x0123CDB1), UINT32_C(0x0A41DC06), + UINT32_C(0x06880096), UINT32_C(0x069843C8), UINT32_C(0x0CF78DBD), + UINT32_C(0x0751C797), UINT32_C(0x0381D873), UINT32_C(0x055DD420), + UINT32_C(0x011ED33F) } }, + { { UINT32_C(0x0629DD22), UINT32_C(0x0329136A), UINT32_C(0x0F4C3A86), + UINT32_C(0x02DF1D68), UINT32_C(0x0629460E), UINT32_C(0x04615D04), + UINT32_C(0x06370A73), UINT32_C(0x0FF4CD28), UINT32_C(0x031AD006), + UINT32_C(0x08F7AAC2), UINT32_C(0x05792159), UINT32_C(0x0680FF31), + UINT32_C(0x04E1BAE8), UINT32_C(0x02E9B2B2), UINT32_C(0x0033BF36), + UINT32_C(0x07DA8F9E), UINT32_C(0x0C93AB40), UINT32_C(0x01D743F3), + UINT32_C(0x07644D30) }, + { UINT32_C(0x075200EB), UINT32_C(0x07C0784F), UINT32_C(0x0BE5A2EF), + UINT32_C(0x002C4071), UINT32_C(0x0BB7DD65), UINT32_C(0x004ADBD2), + UINT32_C(0x040D6568), UINT32_C(0x0F9A3BB6), UINT32_C(0x003E18E7), + UINT32_C(0x0B2FA6B5), UINT32_C(0x04ED429F), UINT32_C(0x06091338), + UINT32_C(0x01D161FD), UINT32_C(0x00454AAD), UINT32_C(0x0CAE06AA), + UINT32_C(0x04E95021), UINT32_C(0x04523C5D), UINT32_C(0x041594F0), + UINT32_C(0x065084CD) } }, + { { UINT32_C(0x002145D7), UINT32_C(0x047D8374), UINT32_C(0x0467ABA3), + UINT32_C(0x051CC3F5), UINT32_C(0x0483BB69), UINT32_C(0x05CC8B8E), + UINT32_C(0x00E452BD), UINT32_C(0x04333A28), UINT32_C(0x04F1A76A), + UINT32_C(0x0CC64EC5), UINT32_C(0x05D9332C), UINT32_C(0x0E975BFD), + UINT32_C(0x036AEA82), UINT32_C(0x03B66BE1), UINT32_C(0x0C8D0897), + UINT32_C(0x00F4E2EA), UINT32_C(0x0E84A7FD), UINT32_C(0x04F8C351), + UINT32_C(0x03B65097) }, + { UINT32_C(0x0DDB406F), UINT32_C(0x00890ADF), UINT32_C(0x03BBC60E), + UINT32_C(0x01C0CA21), UINT32_C(0x0A76C2EF), UINT32_C(0x01695DF8), + UINT32_C(0x07073F32), UINT32_C(0x0EED6813), UINT32_C(0x014D6ADC), + UINT32_C(0x0AD30E57), UINT32_C(0x0080597C), UINT32_C(0x051E8314), + UINT32_C(0x02334D30), UINT32_C(0x01C9AC19), UINT32_C(0x0D628FAA), + UINT32_C(0x03467107), UINT32_C(0x027B5A2C), UINT32_C(0x07FE2414), + UINT32_C(0x06D835AF) } }, + }, + { + { { UINT32_C(0x0EF34144), UINT32_C(0x030D91DC), UINT32_C(0x05517757), + UINT32_C(0x007F4856), UINT32_C(0x07EAF164), UINT32_C(0x058E3931), + UINT32_C(0x0713CF7A), UINT32_C(0x0D5B04EB), UINT32_C(0x0416E9E6), + UINT32_C(0x02479D66), UINT32_C(0x03230F77), UINT32_C(0x0E9111E0), + UINT32_C(0x004A4528), UINT32_C(0x02C7F7D1), UINT32_C(0x02C19F36), + UINT32_C(0x0456B2EE), UINT32_C(0x083CA160), UINT32_C(0x04377D25), + UINT32_C(0x02CC5D8D) }, + { UINT32_C(0x024FDE34), UINT32_C(0x056A1AF8), UINT32_C(0x04A1F978), + UINT32_C(0x07F66131), UINT32_C(0x09CCCEFE), UINT32_C(0x056AE73E), + UINT32_C(0x0373907A), UINT32_C(0x08E4DFA2), UINT32_C(0x06104B90), + UINT32_C(0x0CB65FE3), UINT32_C(0x0157AEF0), UINT32_C(0x0346E5AE), + UINT32_C(0x06A8D9D0), UINT32_C(0x034F592B), UINT32_C(0x06A50F43), + UINT32_C(0x03B946D2), UINT32_C(0x0B23CFAE), UINT32_C(0x01428E19), + UINT32_C(0x01E96239) } }, + { { UINT32_C(0x0FF5FDD9), UINT32_C(0x06FD0B27), UINT32_C(0x0E5375B8), + UINT32_C(0x02903F56), UINT32_C(0x0A0998F1), UINT32_C(0x04C7F7A7), + UINT32_C(0x07B849C2), UINT32_C(0x01F684C1), UINT32_C(0x03D27FA7), + UINT32_C(0x0ECDF852), UINT32_C(0x067A0FF9), UINT32_C(0x01170172), + UINT32_C(0x06847341), UINT32_C(0x0384EC35), UINT32_C(0x097FA0B1), + UINT32_C(0x056D5954), UINT32_C(0x0811FE39), UINT32_C(0x03141A8E), + UINT32_C(0x03197AAF) }, + { UINT32_C(0x06B64713), UINT32_C(0x01EA477B), UINT32_C(0x0401B800), + UINT32_C(0x056A093F), UINT32_C(0x0B18523C), UINT32_C(0x05FBF38B), + UINT32_C(0x0000837C), UINT32_C(0x0205CC9C), UINT32_C(0x0211586E), + UINT32_C(0x00E95959), UINT32_C(0x011034DB), UINT32_C(0x0705835C), + UINT32_C(0x0534A7CA), UINT32_C(0x01BEEAE0), UINT32_C(0x011191B1), + UINT32_C(0x06AC6C8E), UINT32_C(0x0F65A0B0), UINT32_C(0x01E452CE), + UINT32_C(0x07AA591C) } }, + { { UINT32_C(0x04BE78BD), UINT32_C(0x06F41AA4), UINT32_C(0x09895DC2), + UINT32_C(0x05E43C02), UINT32_C(0x0F5ED50D), UINT32_C(0x0055BA85), + UINT32_C(0x04B88B8C), UINT32_C(0x07C05237), UINT32_C(0x06B089B3), + UINT32_C(0x09D41AEF), UINT32_C(0x07A77F2E), UINT32_C(0x0B03794F), + UINT32_C(0x0272136B), UINT32_C(0x013E2617), UINT32_C(0x039B53A2), + UINT32_C(0x04704526), UINT32_C(0x0958114F), UINT32_C(0x01DF2245), + UINT32_C(0x0736ACD3) }, + { UINT32_C(0x020FED74), UINT32_C(0x0142B2B5), UINT32_C(0x00BC648B), + UINT32_C(0x045D8303), UINT32_C(0x01238CE7), UINT32_C(0x041E6696), + UINT32_C(0x07794FE3), UINT32_C(0x02BC0623), UINT32_C(0x04D21409), + UINT32_C(0x05FABD03), UINT32_C(0x074FAEA0), UINT32_C(0x08FD5BE6), + UINT32_C(0x041F41AC), UINT32_C(0x046062AA), UINT32_C(0x06780730), + UINT32_C(0x035F4E6F), UINT32_C(0x016D4890), UINT32_C(0x05B93E77), + UINT32_C(0x01E38302) } }, + { { UINT32_C(0x0736B7A8), UINT32_C(0x049E4056), UINT32_C(0x01935194), + UINT32_C(0x056AFE87), UINT32_C(0x0526EB80), UINT32_C(0x0763756F), + UINT32_C(0x0438F678), UINT32_C(0x074903F5), UINT32_C(0x0305EF19), + UINT32_C(0x0434448D), UINT32_C(0x05186915), UINT32_C(0x00E55244), + UINT32_C(0x017BD6D1), UINT32_C(0x0747C684), UINT32_C(0x0FEE9906), + UINT32_C(0x07BEA2FE), UINT32_C(0x04C3FEC5), UINT32_C(0x05EAB892), + UINT32_C(0x03E3B341) }, + { UINT32_C(0x0DEF19D6), UINT32_C(0x03A56FE1), UINT32_C(0x09F33CC0), + UINT32_C(0x03E3A7C9), UINT32_C(0x04712359), UINT32_C(0x02515669), + UINT32_C(0x035C962B), UINT32_C(0x08C45240), UINT32_C(0x033CCA10), + UINT32_C(0x06965FA2), UINT32_C(0x04F88D82), UINT32_C(0x0FDE595A), + UINT32_C(0x0241F5B1), UINT32_C(0x03F203E1), UINT32_C(0x0BB7CDF8), + UINT32_C(0x046409AD), UINT32_C(0x08E4A186), UINT32_C(0x01723DD8), + UINT32_C(0x02B93AF0) } }, + { { UINT32_C(0x0FACC519), UINT32_C(0x027F5A2C), UINT32_C(0x0CA8C450), + UINT32_C(0x03EC651F), UINT32_C(0x0B47E880), UINT32_C(0x01B9DB47), + UINT32_C(0x06895D1C), UINT32_C(0x0F1857B2), UINT32_C(0x06CC04B3), + UINT32_C(0x01C2D89D), UINT32_C(0x04525759), UINT32_C(0x0B6EACB4), + UINT32_C(0x07770FC8), UINT32_C(0x04A7FC79), UINT32_C(0x03B56F1C), + UINT32_C(0x0248A360), UINT32_C(0x0A73C4C6), UINT32_C(0x04BA5188), + UINT32_C(0x0400E477) }, + { UINT32_C(0x0AEA3E6E), UINT32_C(0x05DA167B), UINT32_C(0x02C8D4B1), + UINT32_C(0x074DB11C), UINT32_C(0x05DB2724), UINT32_C(0x04492C83), + UINT32_C(0x00B62A05), UINT32_C(0x03A036B6), UINT32_C(0x07BC9211), + UINT32_C(0x05739939), UINT32_C(0x00FD8C64), UINT32_C(0x0E68B0EC), + UINT32_C(0x050FC3F3), UINT32_C(0x0446466F), UINT32_C(0x0A598C89), + UINT32_C(0x062CB99D), UINT32_C(0x0C97B1FA), UINT32_C(0x077F1F42), + UINT32_C(0x051B5A92) } }, + { { UINT32_C(0x09C36058), UINT32_C(0x05929A37), UINT32_C(0x079147E4), + UINT32_C(0x0546B4E8), UINT32_C(0x0C41B43A), UINT32_C(0x05F16140), + UINT32_C(0x0124A189), UINT32_C(0x0D01EFB0), UINT32_C(0x00FCDC74), + UINT32_C(0x0D3E796F), UINT32_C(0x0597A54B), UINT32_C(0x097F7DE8), + UINT32_C(0x0677C89A), UINT32_C(0x036C6165), UINT32_C(0x0DFFFA33), + UINT32_C(0x0782CAAE), UINT32_C(0x07E6FE65), UINT32_C(0x04887038), + UINT32_C(0x0636D482) }, + { UINT32_C(0x071EFA02), UINT32_C(0x07F91B7E), UINT32_C(0x0950028E), + UINT32_C(0x069527C7), UINT32_C(0x09CE6F6C), UINT32_C(0x01FEEAA0), + UINT32_C(0x014DED92), UINT32_C(0x0D94B717), UINT32_C(0x014B513D), + UINT32_C(0x0A97F421), UINT32_C(0x075448FA), UINT32_C(0x041A5F24), + UINT32_C(0x0721201F), UINT32_C(0x0444C83A), UINT32_C(0x07F6AE04), + UINT32_C(0x030824B5), UINT32_C(0x0246F2D9), UINT32_C(0x05F21CD9), + UINT32_C(0x06817477) } }, + { { UINT32_C(0x0DDEF055), UINT32_C(0x01C63F00), UINT32_C(0x0570BDE9), + UINT32_C(0x07433A8A), UINT32_C(0x099522A9), UINT32_C(0x051DEDFE), + UINT32_C(0x01712838), UINT32_C(0x0C8ECC33), UINT32_C(0x04846773), + UINT32_C(0x0D5E2042), UINT32_C(0x017373E7), UINT32_C(0x04742EE4), + UINT32_C(0x01053131), UINT32_C(0x01BD8B10), UINT32_C(0x01A5A425), + UINT32_C(0x072BB78A), UINT32_C(0x01A26990), UINT32_C(0x02CD45F0), + UINT32_C(0x03124D19) }, + { UINT32_C(0x01A2F1BD), UINT32_C(0x02C1057A), UINT32_C(0x07B6C2D1), + UINT32_C(0x00B79FA6), UINT32_C(0x09B44B1B), UINT32_C(0x0428D7E8), + UINT32_C(0x04C94C23), UINT32_C(0x0DFB15C5), UINT32_C(0x02F5DBF7), + UINT32_C(0x0BC452A9), UINT32_C(0x044F06AF), UINT32_C(0x06C3295D), + UINT32_C(0x0661CB9B), UINT32_C(0x0001E990), UINT32_C(0x022A6D5E), + UINT32_C(0x03420E57), UINT32_C(0x0D5E7F7E), UINT32_C(0x0593D853), + UINT32_C(0x00938C95) } }, + { { UINT32_C(0x0899A80A), UINT32_C(0x063E3726), UINT32_C(0x08972EC5), + UINT32_C(0x037C93BE), UINT32_C(0x031E1342), UINT32_C(0x07C51EDF), + UINT32_C(0x03702DD4), UINT32_C(0x086F89E1), UINT32_C(0x047EBB47), + UINT32_C(0x06A291B7), UINT32_C(0x0685EBFA), UINT32_C(0x0EF566F4), + UINT32_C(0x02FC8735), UINT32_C(0x03A7F885), UINT32_C(0x0963A567), + UINT32_C(0x02DEC9A4), UINT32_C(0x033285D3), UINT32_C(0x0049779E), + UINT32_C(0x05AB7D24) }, + { UINT32_C(0x04E67976), UINT32_C(0x03AD342E), UINT32_C(0x006D58B0), + UINT32_C(0x0490C968), UINT32_C(0x0428E13C), UINT32_C(0x0183F7B5), + UINT32_C(0x0168EF02), UINT32_C(0x031E9F33), UINT32_C(0x079C2D32), + UINT32_C(0x0EC6C4B2), UINT32_C(0x06334DE3), UINT32_C(0x04E10D5F), + UINT32_C(0x0431C81B), UINT32_C(0x001EE024), UINT32_C(0x01F6A3D0), + UINT32_C(0x0009B04D), UINT32_C(0x0A95C815), UINT32_C(0x06C721B5), + UINT32_C(0x07DEE1A8) } }, + { { UINT32_C(0x0C112CB8), UINT32_C(0x00691E2E), UINT32_C(0x01DBEB00), + UINT32_C(0x077CCE8A), UINT32_C(0x03E91FE4), UINT32_C(0x0690BBBF), + UINT32_C(0x0577CA8A), UINT32_C(0x00B5C974), UINT32_C(0x029377A0), + UINT32_C(0x06FDF488), UINT32_C(0x00872436), UINT32_C(0x0506D32E), + UINT32_C(0x055C17BB), UINT32_C(0x03B00666), UINT32_C(0x0D26AAA8), + UINT32_C(0x03829C3F), UINT32_C(0x08B67A64), UINT32_C(0x0475D296), + UINT32_C(0x027FEFC5) }, + { UINT32_C(0x06814D18), UINT32_C(0x01588692), UINT32_C(0x0D4F0EDD), + UINT32_C(0x007DFA60), UINT32_C(0x042E603A), UINT32_C(0x00885394), + UINT32_C(0x05F797E2), UINT32_C(0x041238B4), UINT32_C(0x052305E5), + UINT32_C(0x0D9515E8), UINT32_C(0x05B10FCD), UINT32_C(0x08F6C6F8), + UINT32_C(0x043FB734), UINT32_C(0x014BE940), UINT32_C(0x0E882EEE), + UINT32_C(0x0077B050), UINT32_C(0x02093150), UINT32_C(0x05A0B712), + UINT32_C(0x06E640E8) } }, + { { UINT32_C(0x0BE77EA4), UINT32_C(0x03634A86), UINT32_C(0x01F8DFF4), + UINT32_C(0x005A0F6B), UINT32_C(0x0D30990A), UINT32_C(0x0712090D), + UINT32_C(0x048C153A), UINT32_C(0x029E8CA3), UINT32_C(0x052B7982), + UINT32_C(0x01355D1B), UINT32_C(0x00109FDB), UINT32_C(0x029EF3CE), + UINT32_C(0x02FA1090), UINT32_C(0x033F025F), UINT32_C(0x03D1969F), + UINT32_C(0x052EDB5F), UINT32_C(0x04D2BEF3), UINT32_C(0x06BF5DE5), + UINT32_C(0x00C8983F) }, + { UINT32_C(0x04B8EB93), UINT32_C(0x0058C176), UINT32_C(0x00A13CB4), + UINT32_C(0x053DF577), UINT32_C(0x0156AEB4), UINT32_C(0x005E3851), + UINT32_C(0x069CEAE2), UINT32_C(0x0030FF4F), UINT32_C(0x001DA227), + UINT32_C(0x05AF81D3), UINT32_C(0x03D80D8D), UINT32_C(0x0A3E8600), + UINT32_C(0x03D228FC), UINT32_C(0x0665245C), UINT32_C(0x09E5CE2E), + UINT32_C(0x03843A9B), UINT32_C(0x02F2D31B), UINT32_C(0x041832DC), + UINT32_C(0x02E66351) } }, + { { UINT32_C(0x05730C8D), UINT32_C(0x06092618), UINT32_C(0x079F5AFA), + UINT32_C(0x06F3E0CF), UINT32_C(0x092BC672), UINT32_C(0x0276DE36), + UINT32_C(0x02D07EDC), UINT32_C(0x0FC6A29F), UINT32_C(0x0486EFA2), + UINT32_C(0x0909E264), UINT32_C(0x056F98E8), UINT32_C(0x08A33777), + UINT32_C(0x007820C7), UINT32_C(0x07E651CF), UINT32_C(0x0928B418), + UINT32_C(0x05EF7EA1), UINT32_C(0x0BE35987), UINT32_C(0x023FE702), + UINT32_C(0x04B874D9) }, + { UINT32_C(0x001A8D36), UINT32_C(0x03FC40DA), UINT32_C(0x00561AB4), + UINT32_C(0x036E4547), UINT32_C(0x0D462FB9), UINT32_C(0x07B2E89D), + UINT32_C(0x0616BF2B), UINT32_C(0x02FA3373), UINT32_C(0x067EE578), + UINT32_C(0x02B81792), UINT32_C(0x03A32F95), UINT32_C(0x019591EC), + UINT32_C(0x047F05AA), UINT32_C(0x058E2F29), UINT32_C(0x04CECEE9), + UINT32_C(0x07DF3632), UINT32_C(0x02BFB16E), UINT32_C(0x03AB1AD0), + UINT32_C(0x0610FCE9) } }, + { { UINT32_C(0x0CE87EAC), UINT32_C(0x00235BF1), UINT32_C(0x0EAE0AF1), + UINT32_C(0x03D89DD3), UINT32_C(0x0B789073), UINT32_C(0x01AC0815), + UINT32_C(0x055721C2), UINT32_C(0x0B2BAD77), UINT32_C(0x05787CF1), + UINT32_C(0x00C70041), UINT32_C(0x00EEE049), UINT32_C(0x0D01B922), + UINT32_C(0x022A24F8), UINT32_C(0x0317FAC7), UINT32_C(0x0D5F402C), + UINT32_C(0x0439541B), UINT32_C(0x07D56CC2), UINT32_C(0x00EB80BF), + UINT32_C(0x00E40AA6) }, + { UINT32_C(0x0A01F6F0), UINT32_C(0x020DA18A), UINT32_C(0x073C68C0), + UINT32_C(0x05338AFA), UINT32_C(0x0DDC8CB0), UINT32_C(0x001C0CED), + UINT32_C(0x07A82BBC), UINT32_C(0x081BF5E1), UINT32_C(0x00B876DD), + UINT32_C(0x09864ED3), UINT32_C(0x07F89153), UINT32_C(0x0A066C82), + UINT32_C(0x042461BC), UINT32_C(0x07592D13), UINT32_C(0x02DBFA28), + UINT32_C(0x0371D64F), UINT32_C(0x0326B139), UINT32_C(0x0545030E), + UINT32_C(0x03B02EDD) } }, + { { UINT32_C(0x0C8AA41D), UINT32_C(0x02999435), UINT32_C(0x011470BE), + UINT32_C(0x02448ABD), UINT32_C(0x0C3A559A), UINT32_C(0x03DE4EDA), + UINT32_C(0x0267ACAB), UINT32_C(0x05B64BAF), UINT32_C(0x06167A36), + UINT32_C(0x080925DF), UINT32_C(0x0748EB2E), UINT32_C(0x0262E572), + UINT32_C(0x06655A71), UINT32_C(0x02DC7E31), UINT32_C(0x009FA448), + UINT32_C(0x05991E95), UINT32_C(0x0FA3D04A), UINT32_C(0x0484BE25), + UINT32_C(0x0438E396) }, + { UINT32_C(0x044C41BB), UINT32_C(0x02EFDFC2), UINT32_C(0x0F459DA9), + UINT32_C(0x04A94A2D), UINT32_C(0x03F47C03), UINT32_C(0x07FA71AF), + UINT32_C(0x03DC178C), UINT32_C(0x0129963B), UINT32_C(0x021E1FD4), + UINT32_C(0x0E7487EB), UINT32_C(0x00C3DDB0), UINT32_C(0x06EE0434), + UINT32_C(0x06D2712F), UINT32_C(0x07842656), UINT32_C(0x013F8F26), + UINT32_C(0x01F9766F), UINT32_C(0x061BD12C), UINT32_C(0x02B96EB7), + UINT32_C(0x01F8FA20) } }, + { { UINT32_C(0x0FB80E07), UINT32_C(0x050B08F2), UINT32_C(0x064554C9), + UINT32_C(0x078E1F81), UINT32_C(0x09ED8841), UINT32_C(0x0596ADC2), + UINT32_C(0x034DF164), UINT32_C(0x020E6E12), UINT32_C(0x018EDA4D), + UINT32_C(0x0174E31B), UINT32_C(0x03B107F1), UINT32_C(0x010EC155), + UINT32_C(0x07FA899A), UINT32_C(0x0717505D), UINT32_C(0x05819825), + UINT32_C(0x0542EC55), UINT32_C(0x038DD6D7), UINT32_C(0x0497E5A0), + UINT32_C(0x03081495) }, + { UINT32_C(0x064986F4), UINT32_C(0x03BD600B), UINT32_C(0x04B78E0D), + UINT32_C(0x0098465F), UINT32_C(0x0E7E78C0), UINT32_C(0x0127CC0E), + UINT32_C(0x07A3BC64), UINT32_C(0x001DBF18), UINT32_C(0x06A78B45), + UINT32_C(0x0D3A5A6B), UINT32_C(0x0682C6C2), UINT32_C(0x0B8EE95B), + UINT32_C(0x066E64B3), UINT32_C(0x04178CB0), UINT32_C(0x0FC2F66E), + UINT32_C(0x04EABB3C), UINT32_C(0x084AF2DE), UINT32_C(0x04C297C1), + UINT32_C(0x0136B06E) } }, + { { UINT32_C(0x07DF6D6E), UINT32_C(0x01F00ED6), UINT32_C(0x02705D3E), + UINT32_C(0x038023D6), UINT32_C(0x0A85D53D), UINT32_C(0x01C4664A), + UINT32_C(0x0610B36C), UINT32_C(0x02BAE274), UINT32_C(0x03566DBB), + UINT32_C(0x0854659C), UINT32_C(0x00F106D4), UINT32_C(0x09D0A630), + UINT32_C(0x01B5D98A), UINT32_C(0x01B27CA8), UINT32_C(0x0F254343), + UINT32_C(0x075491B9), UINT32_C(0x025D2274), UINT32_C(0x04F17B63), + UINT32_C(0x06865DA3) }, + { UINT32_C(0x0D4C1CFE), UINT32_C(0x0612B559), UINT32_C(0x0D29CCC2), + UINT32_C(0x06835607), UINT32_C(0x0E442A4F), UINT32_C(0x003F2EA3), + UINT32_C(0x04DA7E80), UINT32_C(0x079ABF17), UINT32_C(0x062A7A50), + UINT32_C(0x0FE31E03), UINT32_C(0x044D195D), UINT32_C(0x01A9DC51), + UINT32_C(0x05B8C361), UINT32_C(0x06390D3D), UINT32_C(0x0544BD42), + UINT32_C(0x02DB7A09), UINT32_C(0x0367E705), UINT32_C(0x01B34C53), + UINT32_C(0x055F8181) } }, + { { UINT32_C(0x0F3F00C1), UINT32_C(0x04C36A17), UINT32_C(0x0CB05A60), + UINT32_C(0x05742C4B), UINT32_C(0x029DC7BA), UINT32_C(0x00946765), + UINT32_C(0x01F6280B), UINT32_C(0x0A250657), UINT32_C(0x057853BE), + UINT32_C(0x027C17D4), UINT32_C(0x061E6EE7), UINT32_C(0x068934C0), + UINT32_C(0x0225275D), UINT32_C(0x004E706A), UINT32_C(0x08A0E33D), + UINT32_C(0x02EFB382), UINT32_C(0x0231B332), UINT32_C(0x045E20A6), + UINT32_C(0x076538EE) }, + { UINT32_C(0x072461C9), UINT32_C(0x071D932B), UINT32_C(0x099D4C01), + UINT32_C(0x0401E666), UINT32_C(0x07DB6FB0), UINT32_C(0x049F43E4), + UINT32_C(0x056167EA), UINT32_C(0x0D49C41D), UINT32_C(0x05F10CA9), + UINT32_C(0x080EC5BB), UINT32_C(0x05C98C31), UINT32_C(0x01E1F452), + UINT32_C(0x07E42338), UINT32_C(0x04049AA9), UINT32_C(0x032E5588), + UINT32_C(0x01E28C9C), UINT32_C(0x04BCDC8D), UINT32_C(0x04309C54), + UINT32_C(0x02042514) } }, + }, + { + { { UINT32_C(0x02648196), UINT32_C(0x01BF352B), UINT32_C(0x0FCEC15F), + UINT32_C(0x02D3A085), UINT32_C(0x011002A5), UINT32_C(0x026E7651), + UINT32_C(0x021C2A73), UINT32_C(0x0E3392B7), UINT32_C(0x01A26456), + UINT32_C(0x00E05940), UINT32_C(0x05C6D0D8), UINT32_C(0x085D0F62), + UINT32_C(0x03B743E5), UINT32_C(0x05B2C76F), UINT32_C(0x0B270AB3), + UINT32_C(0x076B0EF8), UINT32_C(0x0E5EF80C), UINT32_C(0x0751E040), + UINT32_C(0x0769C73A) }, + { UINT32_C(0x0D9BC7BB), UINT32_C(0x01B398D4), UINT32_C(0x094E3D5E), + UINT32_C(0x0679261C), UINT32_C(0x0F579BC0), UINT32_C(0x0087234F), + UINT32_C(0x01C48CDA), UINT32_C(0x01065BB9), UINT32_C(0x04A8A1F3), + UINT32_C(0x097D469B), UINT32_C(0x046FC17A), UINT32_C(0x00CAE969), + UINT32_C(0x02E690B5), UINT32_C(0x0187C437), UINT32_C(0x000FCD13), + UINT32_C(0x07C0FA30), UINT32_C(0x02F0D63C), UINT32_C(0x0583AE53), + UINT32_C(0x036A77FE) } }, + { { UINT32_C(0x01DE62A2), UINT32_C(0x03B6F417), UINT32_C(0x08D8470C), + UINT32_C(0x041AB290), UINT32_C(0x0D3155E4), UINT32_C(0x043123A7), + UINT32_C(0x06EC3DAC), UINT32_C(0x09575F29), UINT32_C(0x05CC8C01), + UINT32_C(0x028CF2E0), UINT32_C(0x00BB01F9), UINT32_C(0x01E4C554), + UINT32_C(0x07B3F1F5), UINT32_C(0x00E4DC2E), UINT32_C(0x0F6F4AA9), + UINT32_C(0x03F7C702), UINT32_C(0x0EC18583), UINT32_C(0x02949031), + UINT32_C(0x05C16F04) }, + { UINT32_C(0x03BFC242), UINT32_C(0x06AF3468), UINT32_C(0x0509C734), + UINT32_C(0x002581C3), UINT32_C(0x0CD6F167), UINT32_C(0x068B6408), + UINT32_C(0x07D05F00), UINT32_C(0x0D520CDF), UINT32_C(0x02C463E5), + UINT32_C(0x003D2B75), UINT32_C(0x02640D09), UINT32_C(0x0C38D324), + UINT32_C(0x016E198B), UINT32_C(0x01BF3B79), UINT32_C(0x08EFB3AE), + UINT32_C(0x01B11ADD), UINT32_C(0x0428FEBD), UINT32_C(0x0288A4BC), + UINT32_C(0x02ED3D8D) } }, + { { UINT32_C(0x0FE3927A), UINT32_C(0x004463DC), UINT32_C(0x0A23634B), + UINT32_C(0x02C96252), UINT32_C(0x088ACC38), UINT32_C(0x003687F2), + UINT32_C(0x07070A41), UINT32_C(0x0A3D6F58), UINT32_C(0x02ACC6F9), + UINT32_C(0x07A117B7), UINT32_C(0x04BF3041), UINT32_C(0x006C3D57), + UINT32_C(0x05E2A443), UINT32_C(0x00D534BB), UINT32_C(0x01838CCA), + UINT32_C(0x07E9698D), UINT32_C(0x0463E2DC), UINT32_C(0x05A8243F), + UINT32_C(0x02BC2618) }, + { UINT32_C(0x0EBC6638), UINT32_C(0x04B3F3FB), UINT32_C(0x0A7F699B), + UINT32_C(0x070541A8), UINT32_C(0x00275BF7), UINT32_C(0x0335548D), + UINT32_C(0x00C681F5), UINT32_C(0x0AE9575E), UINT32_C(0x02032835), + UINT32_C(0x027F35BF), UINT32_C(0x00A83998), UINT32_C(0x04869978), + UINT32_C(0x04F819CA), UINT32_C(0x075D1DAF), UINT32_C(0x0B79E387), + UINT32_C(0x033A57AB), UINT32_C(0x057298F2), UINT32_C(0x0583C4E3), + UINT32_C(0x067E752D) } }, + { { UINT32_C(0x06B4D0F2), UINT32_C(0x059C637E), UINT32_C(0x0515A54F), + UINT32_C(0x01CB93DA), UINT32_C(0x0AF87FEF), UINT32_C(0x07247119), + UINT32_C(0x0368E1D8), UINT32_C(0x0287508B), UINT32_C(0x04E3B00B), + UINT32_C(0x03EDF00C), UINT32_C(0x0060EB2B), UINT32_C(0x009B64B7), + UINT32_C(0x0059A064), UINT32_C(0x02C48CC2), UINT32_C(0x0D938166), + UINT32_C(0x039A77EF), UINT32_C(0x04F26973), UINT32_C(0x015B1DA7), + UINT32_C(0x048D6DB3) }, + { UINT32_C(0x011EBBDB), UINT32_C(0x06BC0045), UINT32_C(0x0275B56E), + UINT32_C(0x03B89420), UINT32_C(0x013420FC), UINT32_C(0x076F18E5), + UINT32_C(0x00A74F63), UINT32_C(0x0E0F64B7), UINT32_C(0x00503282), + UINT32_C(0x094735D1), UINT32_C(0x013CC6D6), UINT32_C(0x0E5C0E1C), + UINT32_C(0x015BA8D6), UINT32_C(0x07D45F0A), UINT32_C(0x0A29FE38), + UINT32_C(0x0029F319), UINT32_C(0x03AC2D85), UINT32_C(0x027ECAF3), + UINT32_C(0x029D9051) } }, + { { UINT32_C(0x0EA400A9), UINT32_C(0x0158306B), UINT32_C(0x015222F8), + UINT32_C(0x07A029A5), UINT32_C(0x01BD2907), UINT32_C(0x0570C0F6), + UINT32_C(0x0751FAE1), UINT32_C(0x07964BF7), UINT32_C(0x009AA3B7), + UINT32_C(0x03DF8285), UINT32_C(0x005D2075), UINT32_C(0x0DDBE6E5), + UINT32_C(0x04FB407B), UINT32_C(0x05ABE7D8), UINT32_C(0x0C49401A), + UINT32_C(0x04BA9696), UINT32_C(0x03CCE450), UINT32_C(0x04636480), + UINT32_C(0x03F1ABE9) }, + { UINT32_C(0x03EA1F68), UINT32_C(0x0676F7FA), UINT32_C(0x078995D6), + UINT32_C(0x01690C80), UINT32_C(0x0DDD1529), UINT32_C(0x007F78C9), + UINT32_C(0x0408771E), UINT32_C(0x0513A792), UINT32_C(0x003B85AB), + UINT32_C(0x016D7EB5), UINT32_C(0x05E5699C), UINT32_C(0x0BECEE12), + UINT32_C(0x00107C5D), UINT32_C(0x00E4EB89), UINT32_C(0x02F4C652), + UINT32_C(0x04E39F7A), UINT32_C(0x034AED07), UINT32_C(0x0212550E), + UINT32_C(0x0188E07E) } }, + { { UINT32_C(0x0FBBA24C), UINT32_C(0x01E20A63), UINT32_C(0x0FA95AAC), + UINT32_C(0x01C44416), UINT32_C(0x0F08DC76), UINT32_C(0x043CBDF1), + UINT32_C(0x012ABC29), UINT32_C(0x0F6C4233), UINT32_C(0x06107D90), + UINT32_C(0x002CBE36), UINT32_C(0x05234963), UINT32_C(0x059E8B8F), + UINT32_C(0x06167695), UINT32_C(0x04B21ABA), UINT32_C(0x094ABDA3), + UINT32_C(0x01B5AF79), UINT32_C(0x00351EF1), UINT32_C(0x03FE1EFE), + UINT32_C(0x03E83BD1) }, + { UINT32_C(0x04ADEFE3), UINT32_C(0x028AF72F), UINT32_C(0x09E0C0D6), + UINT32_C(0x0104ED8F), UINT32_C(0x0AE0148F), UINT32_C(0x02B05ACD), + UINT32_C(0x066B1ED0), UINT32_C(0x0A3C6BFA), UINT32_C(0x032BBFF9), + UINT32_C(0x0F66AD88), UINT32_C(0x04A9A376), UINT32_C(0x0AF0D447), + UINT32_C(0x047BD087), UINT32_C(0x005F677C), UINT32_C(0x014088B0), + UINT32_C(0x00EDD8EE), UINT32_C(0x0598516D), UINT32_C(0x03FE1205), + UINT32_C(0x073098DE) } }, + { { UINT32_C(0x02841A85), UINT32_C(0x0451A0F7), UINT32_C(0x076BCBFC), + UINT32_C(0x027E002B), UINT32_C(0x04ACD1B5), UINT32_C(0x03AADBAC), + UINT32_C(0x011F71FA), UINT32_C(0x0E1089CF), UINT32_C(0x058740CA), + UINT32_C(0x06DB26BB), UINT32_C(0x02494970), UINT32_C(0x07CCD9E0), + UINT32_C(0x05749062), UINT32_C(0x061E24EF), UINT32_C(0x0BA44927), + UINT32_C(0x01396A99), UINT32_C(0x0C2129A5), UINT32_C(0x06C4E538), + UINT32_C(0x02D308F2) }, + { UINT32_C(0x0E7B0D82), UINT32_C(0x0295DE15), UINT32_C(0x059C10B0), + UINT32_C(0x0240D76A), UINT32_C(0x0AA33AC3), UINT32_C(0x02D5D368), + UINT32_C(0x05DF8706), UINT32_C(0x0A4B7001), UINT32_C(0x031DBF6C), + UINT32_C(0x0BC72CD8), UINT32_C(0x046962A7), UINT32_C(0x0D13BB53), + UINT32_C(0x039B98C0), UINT32_C(0x05AA84ED), UINT32_C(0x058D2735), + UINT32_C(0x0508AB59), UINT32_C(0x085DF0E3), UINT32_C(0x06AA60D9), + UINT32_C(0x0192578B) } }, + { { UINT32_C(0x052517BF), UINT32_C(0x07C0E587), UINT32_C(0x038A5531), + UINT32_C(0x03EE1FF1), UINT32_C(0x062AB6E8), UINT32_C(0x06EF4CCB), + UINT32_C(0x00A09F25), UINT32_C(0x0DBE8342), UINT32_C(0x01D7E02F), + UINT32_C(0x094C49AE), UINT32_C(0x01445CE4), UINT32_C(0x0F435B7F), + UINT32_C(0x07CDF16E), UINT32_C(0x009B8491), UINT32_C(0x0B24E6F7), + UINT32_C(0x01648959), UINT32_C(0x00615CA9), UINT32_C(0x014879FC), + UINT32_C(0x015CCCCE) }, + { UINT32_C(0x0BB6E5C0), UINT32_C(0x072270A8), UINT32_C(0x02BC713E), + UINT32_C(0x0194AF0E), UINT32_C(0x0745C682), UINT32_C(0x00066C6F), + UINT32_C(0x03D36CF5), UINT32_C(0x0593CBB1), UINT32_C(0x05AE790D), + UINT32_C(0x06B1FF53), UINT32_C(0x0620A507), UINT32_C(0x0CB462BF), + UINT32_C(0x068C215C), UINT32_C(0x06AB108C), UINT32_C(0x0B7E3900), + UINT32_C(0x03D88910), UINT32_C(0x0539E087), UINT32_C(0x04AE3141), + UINT32_C(0x035ED7D6) } }, + { { UINT32_C(0x0254F3D7), UINT32_C(0x06792204), UINT32_C(0x0230569F), + UINT32_C(0x03D3FDA9), UINT32_C(0x0B84DD99), UINT32_C(0x07725C4C), + UINT32_C(0x06B0E7C3), UINT32_C(0x0B78D3DF), UINT32_C(0x078AC360), + UINT32_C(0x06CAB919), UINT32_C(0x02F4F70A), UINT32_C(0x013A8BD5), + UINT32_C(0x021D73E0), UINT32_C(0x044B1B4D), UINT32_C(0x0E88A7D4), + UINT32_C(0x05BAA6EC), UINT32_C(0x0526DE60), UINT32_C(0x01D8806A), + UINT32_C(0x04244303) }, + { UINT32_C(0x0108C612), UINT32_C(0x0395A34F), UINT32_C(0x0339198F), + UINT32_C(0x01F179EC), UINT32_C(0x0708D6F3), UINT32_C(0x01DF5235), + UINT32_C(0x0232C546), UINT32_C(0x030C41B0), UINT32_C(0x015FE8CF), + UINT32_C(0x0F21BBB4), UINT32_C(0x0323FD77), UINT32_C(0x06DD81ED), + UINT32_C(0x04136906), UINT32_C(0x054B66A1), UINT32_C(0x0CBBD05A), + UINT32_C(0x0336CEE8), UINT32_C(0x0FCF1FFD), UINT32_C(0x041BBD8F), + UINT32_C(0x07AB12C9) } }, + { { UINT32_C(0x0BBE227D), UINT32_C(0x05858F23), UINT32_C(0x04BF491E), + UINT32_C(0x05728183), UINT32_C(0x079C714E), UINT32_C(0x022A1FCF), + UINT32_C(0x01EF871B), UINT32_C(0x09EDB7B8), UINT32_C(0x01D525A3), + UINT32_C(0x0A87DA27), UINT32_C(0x043F0A4E), UINT32_C(0x09B1CDD1), + UINT32_C(0x00B92721), UINT32_C(0x00B6CCD6), UINT32_C(0x0D63DB15), + UINT32_C(0x023CE576), UINT32_C(0x0C4080E4), UINT32_C(0x033F2061), + UINT32_C(0x031AA1D9) }, + { UINT32_C(0x07EC3A20), UINT32_C(0x01C69A3A), UINT32_C(0x001C25C7), + UINT32_C(0x0210B9C8), UINT32_C(0x08BDFFA8), UINT32_C(0x02E8214B), + UINT32_C(0x017C3E9B), UINT32_C(0x084D91D9), UINT32_C(0x038B3D24), + UINT32_C(0x0EC9081E), UINT32_C(0x026E58E8), UINT32_C(0x032908AE), + UINT32_C(0x02B2F37D), UINT32_C(0x058B11CB), UINT32_C(0x07538C24), + UINT32_C(0x06945091), UINT32_C(0x0F538568), UINT32_C(0x064897F5), + UINT32_C(0x03110AAF) } }, + { { UINT32_C(0x093E7BB1), UINT32_C(0x026B09F0), UINT32_C(0x0763D63D), + UINT32_C(0x01CAD134), UINT32_C(0x053290E7), UINT32_C(0x03190F55), + UINT32_C(0x05929346), UINT32_C(0x090E1278), UINT32_C(0x01D360D4), + UINT32_C(0x0AE8B6AE), UINT32_C(0x036A79E4), UINT32_C(0x08B891A0), + UINT32_C(0x0448F896), UINT32_C(0x02316FA4), UINT32_C(0x0B3F9158), + UINT32_C(0x045DAD8C), UINT32_C(0x073BD91F), UINT32_C(0x0407FC71), + UINT32_C(0x0403F724) }, + { UINT32_C(0x0C0213B3), UINT32_C(0x04667E35), UINT32_C(0x0E2CEB9C), + UINT32_C(0x064EC72A), UINT32_C(0x0A339F01), UINT32_C(0x01E44700), + UINT32_C(0x029951E3), UINT32_C(0x0F9E1903), UINT32_C(0x0760075A), + UINT32_C(0x0B3FB167), UINT32_C(0x015349C6), UINT32_C(0x04915326), + UINT32_C(0x06972404), UINT32_C(0x03D0B541), UINT32_C(0x0FFB253E), + UINT32_C(0x0670C067), UINT32_C(0x017EDCC3), UINT32_C(0x06348A30), + UINT32_C(0x0755DC54) } }, + { { UINT32_C(0x0D72BA02), UINT32_C(0x07FF1EEA), UINT32_C(0x0066BDAD), + UINT32_C(0x039D956A), UINT32_C(0x04E892D7), UINT32_C(0x052419F2), + UINT32_C(0x034B725A), UINT32_C(0x095A35DA), UINT32_C(0x05559103), + UINT32_C(0x018A8F9F), UINT32_C(0x04FC3975), UINT32_C(0x0D1740D2), + UINT32_C(0x0375B900), UINT32_C(0x0761403F), UINT32_C(0x0B953A5F), + UINT32_C(0x04F2FF71), UINT32_C(0x0E1B0B58), UINT32_C(0x07D8573F), + UINT32_C(0x053E8C3E) }, + { UINT32_C(0x055A3B73), UINT32_C(0x04EBD845), UINT32_C(0x0D3A5D27), + UINT32_C(0x03216043), UINT32_C(0x0A2D5A11), UINT32_C(0x03D32430), + UINT32_C(0x063F87FD), UINT32_C(0x0DBF84E0), UINT32_C(0x04C9934A), + UINT32_C(0x08BE9480), UINT32_C(0x02F6DE30), UINT32_C(0x052DB294), + UINT32_C(0x03230313), UINT32_C(0x04592516), UINT32_C(0x0B992B10), + UINT32_C(0x03125EE2), UINT32_C(0x0445BCF9), UINT32_C(0x07349143), + UINT32_C(0x05A112C7) } }, + { { UINT32_C(0x0EA0B318), UINT32_C(0x03F1B159), UINT32_C(0x0487E52E), + UINT32_C(0x05D27B9C), UINT32_C(0x0EBAD615), UINT32_C(0x0459C5D9), + UINT32_C(0x073079D5), UINT32_C(0x078FD2D4), UINT32_C(0x006B7643), + UINT32_C(0x0A73DC2C), UINT32_C(0x041938CF), UINT32_C(0x098897E0), + UINT32_C(0x07660928), UINT32_C(0x058BF110), UINT32_C(0x0696BC61), + UINT32_C(0x07DE18FC), UINT32_C(0x0B815951), UINT32_C(0x04662BC8), + UINT32_C(0x054FF046) }, + { UINT32_C(0x052466CC), UINT32_C(0x02C9E253), UINT32_C(0x07D1C495), + UINT32_C(0x024A0473), UINT32_C(0x0E5AEABA), UINT32_C(0x06DFF20F), + UINT32_C(0x03CCEFD9), UINT32_C(0x0F806D4B), UINT32_C(0x0192D911), + UINT32_C(0x06A7E064), UINT32_C(0x0136BD6C), UINT32_C(0x03CF3E59), + UINT32_C(0x036C910C), UINT32_C(0x02852F51), UINT32_C(0x0D2261F6), + UINT32_C(0x07B11789), UINT32_C(0x05D5440C), UINT32_C(0x068EB2BF), + UINT32_C(0x07C9D3D2) } }, + { { UINT32_C(0x03F78C83), UINT32_C(0x026282EB), UINT32_C(0x0E7E58C8), + UINT32_C(0x01460384), UINT32_C(0x07F8288C), UINT32_C(0x004DDB38), + UINT32_C(0x068A22C1), UINT32_C(0x03B4E4B7), UINT32_C(0x046EC7F7), + UINT32_C(0x0F499BF8), UINT32_C(0x00E98F9D), UINT32_C(0x0201835A), + UINT32_C(0x06CDC18D), UINT32_C(0x054E87E0), UINT32_C(0x09E1190B), + UINT32_C(0x07C8570C), UINT32_C(0x0EE788C0), UINT32_C(0x003B8466), + UINT32_C(0x0513D8F7) }, + { UINT32_C(0x082AE76F), UINT32_C(0x0467154F), UINT32_C(0x090D360C), + UINT32_C(0x04725E35), UINT32_C(0x077F0A4A), UINT32_C(0x01658344), + UINT32_C(0x07BFD41E), UINT32_C(0x0816DFE5), UINT32_C(0x01A64B33), + UINT32_C(0x07DEC344), UINT32_C(0x0404AABD), UINT32_C(0x0DD22DB3), + UINT32_C(0x0372E5A1), UINT32_C(0x01DD7525), UINT32_C(0x01C8CACD), + UINT32_C(0x06A4B923), UINT32_C(0x0CD78815), UINT32_C(0x03B62E43), + UINT32_C(0x0182DCE0) } }, + { { UINT32_C(0x04B1FB35), UINT32_C(0x0061A026), UINT32_C(0x099D37D7), + UINT32_C(0x046459E6), UINT32_C(0x0E8A57EF), UINT32_C(0x001BD06E), + UINT32_C(0x04A92B84), UINT32_C(0x06098C4C), UINT32_C(0x0358B593), + UINT32_C(0x0D4DFE1C), UINT32_C(0x063599D3), UINT32_C(0x02DD18DC), + UINT32_C(0x03007901), UINT32_C(0x01E9DD8D), UINT32_C(0x0400CC35), + UINT32_C(0x0778E5F5), UINT32_C(0x05D5B6A3), UINT32_C(0x02FD411C), + UINT32_C(0x02B425A2) }, + { UINT32_C(0x03812C10), UINT32_C(0x03B78EFC), UINT32_C(0x09532CE4), + UINT32_C(0x04F7D4A9), UINT32_C(0x0F7C04C8), UINT32_C(0x0683AE68), + UINT32_C(0x011B6140), UINT32_C(0x0156737D), UINT32_C(0x035A4EB9), + UINT32_C(0x0A0B7443), UINT32_C(0x064319EB), UINT32_C(0x0B315217), + UINT32_C(0x049C0FB2), UINT32_C(0x004E46BC), UINT32_C(0x0318D072), + UINT32_C(0x052D3EA9), UINT32_C(0x06A15FA8), UINT32_C(0x02E0D5AB), + UINT32_C(0x008DD356) } }, + { { UINT32_C(0x0D00894F), UINT32_C(0x0415F67D), UINT32_C(0x0C243D11), + UINT32_C(0x02B8C573), UINT32_C(0x05C886B6), UINT32_C(0x073E2A37), + UINT32_C(0x01B4E4FA), UINT32_C(0x09A09251), UINT32_C(0x020282E5), + UINT32_C(0x0BCA7D2D), UINT32_C(0x066FF292), UINT32_C(0x09926C99), + UINT32_C(0x03617A48), UINT32_C(0x01530215), UINT32_C(0x063E7DBA), + UINT32_C(0x078B1DFB), UINT32_C(0x0C3844B7), UINT32_C(0x03201272), + UINT32_C(0x0778B4FA) }, + { UINT32_C(0x09305F18), UINT32_C(0x04DACE51), UINT32_C(0x0D07FE4D), + UINT32_C(0x04990FE7), UINT32_C(0x07120719), UINT32_C(0x07AE031B), + UINT32_C(0x003430FE), UINT32_C(0x00C1FBD4), UINT32_C(0x036A0A51), + UINT32_C(0x0A6A12BB), UINT32_C(0x072B00FE), UINT32_C(0x0F112F16), + UINT32_C(0x002D898C), UINT32_C(0x00D7F3F0), UINT32_C(0x02CCB574), + UINT32_C(0x076345FF), UINT32_C(0x02C9358F), UINT32_C(0x017BCB4B), + UINT32_C(0x0579734A) } }, + }, + { + { { UINT32_C(0x0F0DB502), UINT32_C(0x007283D0), UINT32_C(0x08EF623D), + UINT32_C(0x03EA8C5E), UINT32_C(0x0A209E1F), UINT32_C(0x03A40740), + UINT32_C(0x02F81888), UINT32_C(0x0722A969), UINT32_C(0x03DCF02A), + UINT32_C(0x0B8BF42D), UINT32_C(0x046BF6EC), UINT32_C(0x04E7DE79), + UINT32_C(0x032FE5DF), UINT32_C(0x01C17AC3), UINT32_C(0x088F43CD), + UINT32_C(0x06D316FF), UINT32_C(0x00B6FB94), UINT32_C(0x03A7A692), + UINT32_C(0x03E132AC) }, + { UINT32_C(0x045CE248), UINT32_C(0x0462F43F), UINT32_C(0x09F103B7), + UINT32_C(0x03CE6503), UINT32_C(0x02C55CD7), UINT32_C(0x01FAC8B9), + UINT32_C(0x07F7D41F), UINT32_C(0x049B3922), UINT32_C(0x0538164A), + UINT32_C(0x0C32168B), UINT32_C(0x021D15D5), UINT32_C(0x0FBE7AB4), + UINT32_C(0x049ABD36), UINT32_C(0x06689278), UINT32_C(0x090906E0), + UINT32_C(0x02853127), UINT32_C(0x032C40D9), UINT32_C(0x0284E722), + UINT32_C(0x05B9DA3D) } }, + { { UINT32_C(0x08B06389), UINT32_C(0x039D7B29), UINT32_C(0x026E0D8E), + UINT32_C(0x038E31F2), UINT32_C(0x0F482001), UINT32_C(0x046C5627), + UINT32_C(0x0153F461), UINT32_C(0x0FC4C626), UINT32_C(0x035A22C9), + UINT32_C(0x0CB5BCED), UINT32_C(0x032AE85F), UINT32_C(0x097105A2), + UINT32_C(0x0661090D), UINT32_C(0x02190C38), UINT32_C(0x05F88BB1), + UINT32_C(0x020AFD4B), UINT32_C(0x07693E86), UINT32_C(0x036234B0), + UINT32_C(0x0201EE7C) }, + { UINT32_C(0x05177EBC), UINT32_C(0x07334497), UINT32_C(0x021FB6DB), + UINT32_C(0x00E242A1), UINT32_C(0x06ACC48D), UINT32_C(0x0617860E), + UINT32_C(0x04002467), UINT32_C(0x006684B4), UINT32_C(0x005E7367), + UINT32_C(0x02210321), UINT32_C(0x06AE2E12), UINT32_C(0x0A170483), + UINT32_C(0x06811FED), UINT32_C(0x02AF7598), UINT32_C(0x099B28F0), + UINT32_C(0x04B2EAC3), UINT32_C(0x03144E87), UINT32_C(0x052C741C), + UINT32_C(0x00219EE8) } }, + { { UINT32_C(0x00581DC0), UINT32_C(0x076911B9), UINT32_C(0x03F907DF), + UINT32_C(0x00FD8CCC), UINT32_C(0x0BD0DFDF), UINT32_C(0x0388BBE8), + UINT32_C(0x0549C09A), UINT32_C(0x0387AC55), UINT32_C(0x07AF40E6), + UINT32_C(0x0981B7A5), UINT32_C(0x05ADE4BE), UINT32_C(0x052D5C55), + UINT32_C(0x076A04D2), UINT32_C(0x032751B9), UINT32_C(0x0BCE279F), + UINT32_C(0x034D2A39), UINT32_C(0x0AEDCDAE), UINT32_C(0x00365DC7), + UINT32_C(0x03453CBF) }, + { UINT32_C(0x0FAB453E), UINT32_C(0x011CF084), UINT32_C(0x09E21C47), + UINT32_C(0x06CF3197), UINT32_C(0x00831296), UINT32_C(0x057F4CE5), + UINT32_C(0x020F8EE8), UINT32_C(0x05B31872), UINT32_C(0x0779598D), + UINT32_C(0x07C7AC32), UINT32_C(0x05B64DC4), UINT32_C(0x0E058DB2), + UINT32_C(0x060142F5), UINT32_C(0x0757FAC8), UINT32_C(0x0320EFE8), + UINT32_C(0x03D158EA), UINT32_C(0x025240D2), UINT32_C(0x0116989D), + UINT32_C(0x04BFB887) } }, + { { UINT32_C(0x0DB8A57B), UINT32_C(0x0056DCD3), UINT32_C(0x0355B904), + UINT32_C(0x03D5725A), UINT32_C(0x007C7371), UINT32_C(0x00CF4193), + UINT32_C(0x020AD78C), UINT32_C(0x0305EFAF), UINT32_C(0x03715E8F), + UINT32_C(0x04E06800), UINT32_C(0x0464FE0B), UINT32_C(0x041671C5), + UINT32_C(0x07289FAC), UINT32_C(0x045EC338), UINT32_C(0x049BEE4D), + UINT32_C(0x06F62A0E), UINT32_C(0x04025E36), UINT32_C(0x05D25CE9), + UINT32_C(0x07C568B5) }, + { UINT32_C(0x0D4BD6B6), UINT32_C(0x00933993), UINT32_C(0x0B7EEBBA), + UINT32_C(0x0281309E), UINT32_C(0x065E8268), UINT32_C(0x035579CF), + UINT32_C(0x05550C9A), UINT32_C(0x0D7980B4), UINT32_C(0x0531F076), + UINT32_C(0x0CD2F37E), UINT32_C(0x03059FC3), UINT32_C(0x00281179), + UINT32_C(0x019AAC99), UINT32_C(0x017555A7), UINT32_C(0x0FF849A4), + UINT32_C(0x04EE5361), UINT32_C(0x08C87DDE), UINT32_C(0x004920CB), + UINT32_C(0x0472AE6B) } }, + { { UINT32_C(0x05AD0B4E), UINT32_C(0x0000D01D), UINT32_C(0x0A1C822E), + UINT32_C(0x004A7A0A), UINT32_C(0x0AA08F1E), UINT32_C(0x05917BCC), + UINT32_C(0x073D4A38), UINT32_C(0x06389FF3), UINT32_C(0x047A94F0), + UINT32_C(0x06710D9B), UINT32_C(0x0752964E), UINT32_C(0x030EF732), + UINT32_C(0x01AE9023), UINT32_C(0x0752E2B4), UINT32_C(0x0343C25C), + UINT32_C(0x04C0A3C3), UINT32_C(0x0B4EFABB), UINT32_C(0x079ACB07), + UINT32_C(0x05BEE507) }, + { UINT32_C(0x03494AD9), UINT32_C(0x05EA99AF), UINT32_C(0x0389480B), + UINT32_C(0x05160DCE), UINT32_C(0x010C3CBB), UINT32_C(0x04B92C2A), + UINT32_C(0x05F2D771), UINT32_C(0x0A57A2FD), UINT32_C(0x007C232D), + UINT32_C(0x0ECF6652), UINT32_C(0x06762C3E), UINT32_C(0x0531B5E7), + UINT32_C(0x03E82FC8), UINT32_C(0x01820A9D), UINT32_C(0x010298C1), + UINT32_C(0x040BB915), UINT32_C(0x06C4DE5F), UINT32_C(0x00F95873), + UINT32_C(0x00D564BB) } }, + { { UINT32_C(0x06647B76), UINT32_C(0x05951386), UINT32_C(0x01C3CEEE), + UINT32_C(0x05B4A2A9), UINT32_C(0x00C0D10D), UINT32_C(0x07198ABC), + UINT32_C(0x0344EBA4), UINT32_C(0x01102AAD), UINT32_C(0x00A6BD8E), + UINT32_C(0x041FD3B9), UINT32_C(0x072FD40E), UINT32_C(0x04DF271A), + UINT32_C(0x07951CEE), UINT32_C(0x0434A805), UINT32_C(0x03CBC676), + UINT32_C(0x07E6DD9D), UINT32_C(0x037A89AF), UINT32_C(0x01076ABD), + UINT32_C(0x00509445) }, + { UINT32_C(0x0D8A2C33), UINT32_C(0x05E083E6), UINT32_C(0x05C0317D), + UINT32_C(0x0602A2EA), UINT32_C(0x00A16254), UINT32_C(0x065050EB), + UINT32_C(0x014C68D6), UINT32_C(0x0EA8DF00), UINT32_C(0x002096BA), + UINT32_C(0x00D2E7B4), UINT32_C(0x03580F1C), UINT32_C(0x0237FA0E), + UINT32_C(0x01C7F56A), UINT32_C(0x054A6A4F), UINT32_C(0x03E879F4), + UINT32_C(0x008B47F5), UINT32_C(0x0EDF35FC), UINT32_C(0x01F3F7F0), + UINT32_C(0x03E78806) } }, + { { UINT32_C(0x038F6A40), UINT32_C(0x05B8DCB9), UINT32_C(0x07D27CDC), + UINT32_C(0x03392DA1), UINT32_C(0x066611C2), UINT32_C(0x066344AA), + UINT32_C(0x05F431C8), UINT32_C(0x07255E87), UINT32_C(0x0135642A), + UINT32_C(0x051CFCBA), UINT32_C(0x045D25F5), UINT32_C(0x08BB7E3A), + UINT32_C(0x022605AB), UINT32_C(0x00C874AA), UINT32_C(0x0195652F), + UINT32_C(0x00E16A23), UINT32_C(0x0D18A297), UINT32_C(0x024B6188), + UINT32_C(0x025A9403) }, + { UINT32_C(0x04F1EAD3), UINT32_C(0x03669651), UINT32_C(0x0E87093B), + UINT32_C(0x05F1CF35), UINT32_C(0x019B74E6), UINT32_C(0x0177BF8B), + UINT32_C(0x036B76B9), UINT32_C(0x0B817B29), UINT32_C(0x009C77FA), + UINT32_C(0x0202860C), UINT32_C(0x01D1AB54), UINT32_C(0x0B180712), + UINT32_C(0x06B274AA), UINT32_C(0x0121DBED), UINT32_C(0x0AEA446B), + UINT32_C(0x044661E9), UINT32_C(0x0C3EE1D4), UINT32_C(0x045027EE), + UINT32_C(0x014C275F) } }, + { { UINT32_C(0x004023FD), UINT32_C(0x01669241), UINT32_C(0x0693C19B), + UINT32_C(0x0058FB3D), UINT32_C(0x0756B182), UINT32_C(0x075D0BEC), + UINT32_C(0x07A393EF), UINT32_C(0x0B75B610), UINT32_C(0x07D0B5FD), + UINT32_C(0x060DEE19), UINT32_C(0x02373BD5), UINT32_C(0x0A1D84BA), + UINT32_C(0x07E8F3AA), UINT32_C(0x01D80791), UINT32_C(0x09D535D0), + UINT32_C(0x01AB79C2), UINT32_C(0x0D7911BC), UINT32_C(0x03496555), + UINT32_C(0x0370FC52) }, + { UINT32_C(0x0CA626DD), UINT32_C(0x018A8079), UINT32_C(0x02E35F36), + UINT32_C(0x00EF1C67), UINT32_C(0x0942648A), UINT32_C(0x05578B93), + UINT32_C(0x07DDB397), UINT32_C(0x095E9BED), UINT32_C(0x07DEB648), + UINT32_C(0x020D82EB), UINT32_C(0x02384172), UINT32_C(0x0988C739), + UINT32_C(0x035C1ACA), UINT32_C(0x053C61ED), UINT32_C(0x036A12D0), + UINT32_C(0x070600B9), UINT32_C(0x05505FED), UINT32_C(0x04D77717), + UINT32_C(0x04E32DD7) } }, + { { UINT32_C(0x0F32AB3F), UINT32_C(0x03271637), UINT32_C(0x01E6E3C1), + UINT32_C(0x04B433DF), UINT32_C(0x0313D761), UINT32_C(0x01F05C43), + UINT32_C(0x01B6E232), UINT32_C(0x0B782E36), UINT32_C(0x0142A283), + UINT32_C(0x06A37377), UINT32_C(0x063B9255), UINT32_C(0x05FF47C8), + UINT32_C(0x02270CEE), UINT32_C(0x04B3AC67), UINT32_C(0x07D72B62), + UINT32_C(0x006133F9), UINT32_C(0x0BFDFB85), UINT32_C(0x04FE3C0B), + UINT32_C(0x0406E239) }, + { UINT32_C(0x0737D38E), UINT32_C(0x07FBCD12), UINT32_C(0x00F51FBD), + UINT32_C(0x02A182A2), UINT32_C(0x062DA827), UINT32_C(0x01D9AB6A), + UINT32_C(0x0539AEBA), UINT32_C(0x0AB608B0), UINT32_C(0x0226B3BB), + UINT32_C(0x0ED7323F), UINT32_C(0x04ADDB11), UINT32_C(0x05B1E5DF), + UINT32_C(0x013ECB65), UINT32_C(0x0282983F), UINT32_C(0x02BDD0BD), + UINT32_C(0x07F0D675), UINT32_C(0x0C80C17E), UINT32_C(0x06B40353), + UINT32_C(0x01D570D9) } }, + { { UINT32_C(0x0D4D4113), UINT32_C(0x0371ACBF), UINT32_C(0x076D0600), + UINT32_C(0x06867748), UINT32_C(0x0267DC5C), UINT32_C(0x04199EE8), + UINT32_C(0x015FF11F), UINT32_C(0x01DBB00A), UINT32_C(0x03C8E489), + UINT32_C(0x0218373A), UINT32_C(0x00180AE9), UINT32_C(0x0A2CAFBC), + UINT32_C(0x016437D1), UINT32_C(0x058A25D0), UINT32_C(0x0AB57613), + UINT32_C(0x07DF8B7E), UINT32_C(0x0985AF6A), UINT32_C(0x04CCAE37), + UINT32_C(0x0300D01F) }, + { UINT32_C(0x092A3113), UINT32_C(0x05B20515), UINT32_C(0x0F0E530A), + UINT32_C(0x0605CBBF), UINT32_C(0x05FD19B3), UINT32_C(0x01593B38), + UINT32_C(0x003D988A), UINT32_C(0x03D76657), UINT32_C(0x017E79DC), + UINT32_C(0x02EC918C), UINT32_C(0x069A3B0F), UINT32_C(0x06FB78CA), + UINT32_C(0x07B0B30F), UINT32_C(0x0224A884), UINT32_C(0x0FF6CD50), + UINT32_C(0x07D9D639), UINT32_C(0x0D753C54), UINT32_C(0x04ED3D38), + UINT32_C(0x01E9C727) } }, + { { UINT32_C(0x0201CD59), UINT32_C(0x01D5BE35), UINT32_C(0x0B2E0772), + UINT32_C(0x04E8E2C3), UINT32_C(0x06C76E20), UINT32_C(0x01464A0E), + UINT32_C(0x056C1CE9), UINT32_C(0x04E3B528), UINT32_C(0x037AAFAB), + UINT32_C(0x06CE134F), UINT32_C(0x06158AF6), UINT32_C(0x02AF338B), + UINT32_C(0x025085B6), UINT32_C(0x07AABBFC), UINT32_C(0x0670F3BE), + UINT32_C(0x0108503F), UINT32_C(0x0DC85D51), UINT32_C(0x07F4439A), + UINT32_C(0x046E6FC9) }, + { UINT32_C(0x08FFB263), UINT32_C(0x01FF6045), UINT32_C(0x0C4E1676), + UINT32_C(0x038E4F62), UINT32_C(0x06DD24CD), UINT32_C(0x0142D912), + UINT32_C(0x015AAC36), UINT32_C(0x0DF58E09), UINT32_C(0x038F3D3B), + UINT32_C(0x014D0412), UINT32_C(0x0123F0AF), UINT32_C(0x0021ED27), + UINT32_C(0x0004843B), UINT32_C(0x05BF4326), UINT32_C(0x05A672B0), + UINT32_C(0x02B6453D), UINT32_C(0x0C7F1450), UINT32_C(0x04A895A4), + UINT32_C(0x061C3DF9) } }, + { { UINT32_C(0x0E593E49), UINT32_C(0x07ABFF21), UINT32_C(0x076E69C7), + UINT32_C(0x05C81656), UINT32_C(0x0858D39E), UINT32_C(0x041FC1FA), + UINT32_C(0x03599A84), UINT32_C(0x0ECF483C), UINT32_C(0x0190C4E8), + UINT32_C(0x08EA24D2), UINT32_C(0x03536BE7), UINT32_C(0x0E3746C4), + UINT32_C(0x0632F6BA), UINT32_C(0x05CFBDCC), UINT32_C(0x060097CB), + UINT32_C(0x04B0546F), UINT32_C(0x0AB5C45F), UINT32_C(0x04F8975E), + UINT32_C(0x04C5D61F) }, + { UINT32_C(0x062B46F6), UINT32_C(0x07516E20), UINT32_C(0x0C1F955C), + UINT32_C(0x001F66A2), UINT32_C(0x0ED0D917), UINT32_C(0x0406AF99), + UINT32_C(0x069CF83E), UINT32_C(0x0D4D8A00), UINT32_C(0x03D763C5), + UINT32_C(0x0E1FD9A7), UINT32_C(0x0056211F), UINT32_C(0x07531A2F), + UINT32_C(0x00973B69), UINT32_C(0x021DCD32), UINT32_C(0x09D0AC99), + UINT32_C(0x0549BFEA), UINT32_C(0x0305E319), UINT32_C(0x01342656), + UINT32_C(0x001B80FB) } }, + { { UINT32_C(0x031FFCBB), UINT32_C(0x06BC2475), UINT32_C(0x090EA8B2), + UINT32_C(0x0716EDFB), UINT32_C(0x0418E2AE), UINT32_C(0x0381C978), + UINT32_C(0x05591029), UINT32_C(0x09BD26C6), UINT32_C(0x0460D4D5), + UINT32_C(0x07DAA20D), UINT32_C(0x01560E68), UINT32_C(0x04AAAB23), + UINT32_C(0x01EA985C), UINT32_C(0x0631896F), UINT32_C(0x0FD13830), + UINT32_C(0x0416257F), UINT32_C(0x069B78E7), UINT32_C(0x0016004F), + UINT32_C(0x07B5E05F) }, + { UINT32_C(0x0749B010), UINT32_C(0x0716A42F), UINT32_C(0x0DEDE224), + UINT32_C(0x06E403DB), UINT32_C(0x01FC6739), UINT32_C(0x07F5928B), + UINT32_C(0x04FF09AE), UINT32_C(0x096D2235), UINT32_C(0x032412BF), + UINT32_C(0x0635ABB1), UINT32_C(0x0480F063), UINT32_C(0x0BA557CC), + UINT32_C(0x05C0FEF3), UINT32_C(0x01C7CB5C), UINT32_C(0x09482C2A), + UINT32_C(0x003CF65B), UINT32_C(0x0F39C07C), UINT32_C(0x00902580), + UINT32_C(0x053F7D95) } }, + { { UINT32_C(0x00C6A752), UINT32_C(0x0600187B), UINT32_C(0x031FD29E), + UINT32_C(0x07202D01), UINT32_C(0x08706FD9), UINT32_C(0x003A8DA7), + UINT32_C(0x02BC4807), UINT32_C(0x0108B8E2), UINT32_C(0x03DCB4C3), + UINT32_C(0x00E5D109), UINT32_C(0x0133EBE8), UINT32_C(0x0DBC9FDB), + UINT32_C(0x037A84B4), UINT32_C(0x000D902A), UINT32_C(0x0B159D44), + UINT32_C(0x0385B949), UINT32_C(0x0BB24FD6), UINT32_C(0x05FFC44B), + UINT32_C(0x0402B0EA) }, + { UINT32_C(0x0AFA8C2B), UINT32_C(0x03A224AC), UINT32_C(0x08FD7C67), + UINT32_C(0x072E1371), UINT32_C(0x01FA5FB1), UINT32_C(0x060D59B5), + UINT32_C(0x004D1058), UINT32_C(0x0193E727), UINT32_C(0x0093B083), + UINT32_C(0x0ABA0999), UINT32_C(0x07F25ECC), UINT32_C(0x0E8D4648), + UINT32_C(0x045B908B), UINT32_C(0x02C916E0), UINT32_C(0x052F14F8), + UINT32_C(0x00430404), UINT32_C(0x0B8E9A2B), UINT32_C(0x00F4BF45), + UINT32_C(0x03F0A1D1) } }, + { { UINT32_C(0x0CEE5802), UINT32_C(0x00880798), UINT32_C(0x01C63FFC), + UINT32_C(0x071B8526), UINT32_C(0x0C1068FB), UINT32_C(0x052F9DB3), + UINT32_C(0x01DDC849), UINT32_C(0x0E84AF14), UINT32_C(0x06CD446D), + UINT32_C(0x0A9F92C6), UINT32_C(0x01676037), UINT32_C(0x02A0264C), + UINT32_C(0x0467C53C), UINT32_C(0x051C4EE1), UINT32_C(0x01F47FF0), + UINT32_C(0x022246B4), UINT32_C(0x07D42402), UINT32_C(0x0287119F), + UINT32_C(0x04434D4E) }, + { UINT32_C(0x018DA0C0), UINT32_C(0x042E86EE), UINT32_C(0x08509770), + UINT32_C(0x04EDAEB9), UINT32_C(0x0A4009B5), UINT32_C(0x0335CB55), + UINT32_C(0x064D21EC), UINT32_C(0x0647F463), UINT32_C(0x07A167F4), + UINT32_C(0x023FB0E4), UINT32_C(0x062A970D), UINT32_C(0x00205267), + UINT32_C(0x036D3513), UINT32_C(0x07ABD182), UINT32_C(0x0B51FDBA), + UINT32_C(0x077B5CD0), UINT32_C(0x0896BFE4), UINT32_C(0x0300338E), + UINT32_C(0x06FF9581) } }, + { { UINT32_C(0x054184BF), UINT32_C(0x02DCF217), UINT32_C(0x0880D0D9), + UINT32_C(0x019760C7), UINT32_C(0x0662BD25), UINT32_C(0x06A962DD), + UINT32_C(0x04C69173), UINT32_C(0x019D4A19), UINT32_C(0x05AD5A5F), + UINT32_C(0x0E23BF0B), UINT32_C(0x07D3C575), UINT32_C(0x0BCDA9CF), + UINT32_C(0x019497F7), UINT32_C(0x01914517), UINT32_C(0x027F0C56), + UINT32_C(0x048ED5F5), UINT32_C(0x078B0933), UINT32_C(0x01A7EB30), + UINT32_C(0x066D17B3) }, + { UINT32_C(0x00A95EDC), UINT32_C(0x0386D25E), UINT32_C(0x039DE915), + UINT32_C(0x076A16CE), UINT32_C(0x05DCE4A7), UINT32_C(0x07C40607), + UINT32_C(0x06F1B7C2), UINT32_C(0x0A817858), UINT32_C(0x0147CB22), + UINT32_C(0x0D109609), UINT32_C(0x0454D2C5), UINT32_C(0x0D788CF4), + UINT32_C(0x03DCA054), UINT32_C(0x02A7B716), UINT32_C(0x05C66166), + UINT32_C(0x01AC2B32), UINT32_C(0x0D0C246B), UINT32_C(0x02E38AD2), + UINT32_C(0x039CDC10) } }, + } +}; + +/*- + * Finite field inversion. + * Computed with exponentiation via FLT. + * Autogenerated: ecp/secp521r1/fe_inv.op3 + * custom repunit addition chain + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp521r1_inv(fe_t output, const fe_t t1) +{ + int i; + /* temporary variables */ + fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8; + + fiat_secp521r1_carry_square(acc, t1); + fiat_secp521r1_carry_mul(t2, acc, t1); + fiat_secp521r1_carry_square(acc, t2); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t4, acc, t2); + fiat_secp521r1_carry_square(acc, t4); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t8, acc, t4); + fiat_secp521r1_carry_square(acc, t8); + for (i = 0; i < 7; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t16, acc, t8); + fiat_secp521r1_carry_square(acc, t16); + for (i = 0; i < 15; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t32, acc, t16); + fiat_secp521r1_carry_square(acc, t32); + for (i = 0; i < 31; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t64, acc, t32); + fiat_secp521r1_carry_square(acc, t64); + for (i = 0; i < 63; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t128, acc, t64); + fiat_secp521r1_carry_square(acc, t128); + for (i = 0; i < 127; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t256, acc, t128); + fiat_secp521r1_carry_square(acc, t256); + for (i = 0; i < 255; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t512, acc, t256); + fiat_secp521r1_carry_square(acc, t512); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t516, acc, t4); + fiat_secp521r1_carry_square(acc, t516); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t518, acc, t2); + fiat_secp521r1_carry_square(acc, t518); + fiat_secp521r1_carry_mul(t519, acc, t1); + fiat_secp521r1_carry_square(acc, t519); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(output, acc, t1); +} + +/*- + * Q := 2P, both projective, Q and P same pointers OK + * Autogenerated: op3/dbl_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 6 + * ASSERT: a = -3 + */ +static void +point_double(pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X = P->X; + const limb_t *Y = P->Y; + const limb_t *Z = P->Z; + limb_t *X3 = Q->X; + limb_t *Y3 = Q->Y; + limb_t *Z3 = Q->Z; + + /* the curve arith formula */ + fiat_secp521r1_carry_square(t0, X); + fiat_secp521r1_carry_square(t1, Y); + fiat_secp521r1_carry_square(t2, Z); + fiat_secp521r1_carry_mul(t3, X, Y); + fiat_secp521r1_carry_add(t3, t3, t3); + fiat_secp521r1_carry_mul(t4, Y, Z); + fiat_secp521r1_carry_mul(Z3, X, Z); + fiat_secp521r1_carry_add(Z3, Z3, Z3); + fiat_secp521r1_carry_mul(Y3, b, t2); + fiat_secp521r1_carry_sub(Y3, Y3, Z3); + fiat_secp521r1_carry_add(X3, Y3, Y3); + fiat_secp521r1_carry_add(Y3, X3, Y3); + fiat_secp521r1_carry_sub(X3, t1, Y3); + fiat_secp521r1_carry_add(Y3, t1, Y3); + fiat_secp521r1_carry_mul(Y3, X3, Y3); + fiat_secp521r1_carry_mul(X3, X3, t3); + fiat_secp521r1_carry_add(t3, t2, t2); + fiat_secp521r1_carry_add(t2, t2, t3); + fiat_secp521r1_carry_mul(Z3, b, Z3); + fiat_secp521r1_carry_sub(Z3, Z3, t2); + fiat_secp521r1_carry_sub(Z3, Z3, t0); + fiat_secp521r1_carry_add(t3, Z3, Z3); + fiat_secp521r1_carry_add(Z3, Z3, t3); + fiat_secp521r1_carry_add(t3, t0, t0); + fiat_secp521r1_carry_add(t0, t3, t0); + fiat_secp521r1_carry_sub(t0, t0, t2); + fiat_secp521r1_carry_mul(t0, t0, Z3); + fiat_secp521r1_carry_add(Y3, Y3, t0); + fiat_secp521r1_carry_add(t0, t4, t4); + fiat_secp521r1_carry_mul(Z3, t0, Z3); + fiat_secp521r1_carry_sub(X3, X3, Z3); + fiat_secp521r1_carry_mul(Z3, t0, t1); + fiat_secp521r1_carry_add(Z3, Z3, Z3); + fiat_secp521r1_carry_add(Z3, Z3, Z3); +} + +/*- + * out1 = (arg1 == 0) ? 0 : nz + * NB: this is not a "mod p equiv" 0, but literal 0 + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp521r1_nonzero(limb_t *out1, const fe_t arg1) +{ + limb_t x1 = 0; + int i; + + for (i = 0; i < LIMB_CNT; i++) + x1 |= arg1[i]; + *out1 = x1; +} + +/*- + * R := Q + P where R and Q are projective, P affine. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_mixed.op3 + * https://eprint.iacr.org/2015/1060 Alg 5 + * ASSERT: a = -3 + */ +static void +point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + fe_t X3; + fe_t Y3; + fe_t Z3; + limb_t nz; + + /* check P for affine inf */ + fiat_secp521r1_nonzero(&nz, P->Y); + + /* the curve arith formula */ + fiat_secp521r1_carry_mul(t0, X1, X2); + fiat_secp521r1_carry_mul(t1, Y1, Y2); + fiat_secp521r1_carry_add(t3, X2, Y2); + fiat_secp521r1_carry_add(t4, X1, Y1); + fiat_secp521r1_carry_mul(t3, t3, t4); + fiat_secp521r1_carry_add(t4, t0, t1); + fiat_secp521r1_carry_sub(t3, t3, t4); + fiat_secp521r1_carry_mul(t4, Y2, Z1); + fiat_secp521r1_carry_add(t4, t4, Y1); + fiat_secp521r1_carry_mul(Y3, X2, Z1); + fiat_secp521r1_carry_add(Y3, Y3, X1); + fiat_secp521r1_carry_mul(Z3, b, Z1); + fiat_secp521r1_carry_sub(X3, Y3, Z3); + fiat_secp521r1_carry_add(Z3, X3, X3); + fiat_secp521r1_carry_add(X3, X3, Z3); + fiat_secp521r1_carry_sub(Z3, t1, X3); + fiat_secp521r1_carry_add(X3, t1, X3); + fiat_secp521r1_carry_mul(Y3, b, Y3); + fiat_secp521r1_carry_add(t1, Z1, Z1); + fiat_secp521r1_carry_add(t2, t1, Z1); + fiat_secp521r1_carry_sub(Y3, Y3, t2); + fiat_secp521r1_carry_sub(Y3, Y3, t0); + fiat_secp521r1_carry_add(t1, Y3, Y3); + fiat_secp521r1_carry_add(Y3, t1, Y3); + fiat_secp521r1_carry_add(t1, t0, t0); + fiat_secp521r1_carry_add(t0, t1, t0); + fiat_secp521r1_carry_sub(t0, t0, t2); + fiat_secp521r1_carry_mul(t1, t4, Y3); + fiat_secp521r1_carry_mul(t2, t0, Y3); + fiat_secp521r1_carry_mul(Y3, X3, Z3); + fiat_secp521r1_carry_add(Y3, Y3, t2); + fiat_secp521r1_carry_mul(X3, t3, X3); + fiat_secp521r1_carry_sub(X3, X3, t1); + fiat_secp521r1_carry_mul(Z3, t4, Z3); + fiat_secp521r1_carry_mul(t1, t3, t0); + fiat_secp521r1_carry_add(Z3, Z3, t1); + + /* if P is inf, throw all that away and take Q */ + fiat_secp521r1_selectznz(R->X, nz, Q->X, X3); + fiat_secp521r1_selectznz(R->Y, nz, Q->Y, Y3); + fiat_secp521r1_selectznz(R->Z, nz, Q->Z, Z3); +} + +/*- + * R := Q + P all projective. + * R and Q same pointers OK + * R and P same pointers not OK + * Autogenerated: op3/add_proj.op3 + * https://eprint.iacr.org/2015/1060 Alg 4 + * ASSERT: a = -3 + */ +static void +point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P) +{ + /* temporary variables */ + fe_t t0, t1, t2, t3, t4, t5; + /* constants */ + const limb_t *b = const_b; + /* set pointers for legacy curve arith */ + const limb_t *X1 = Q->X; + const limb_t *Y1 = Q->Y; + const limb_t *Z1 = Q->Z; + const limb_t *X2 = P->X; + const limb_t *Y2 = P->Y; + const limb_t *Z2 = P->Z; + limb_t *X3 = R->X; + limb_t *Y3 = R->Y; + limb_t *Z3 = R->Z; + + /* the curve arith formula */ + fiat_secp521r1_carry_mul(t0, X1, X2); + fiat_secp521r1_carry_mul(t1, Y1, Y2); + fiat_secp521r1_carry_mul(t2, Z1, Z2); + fiat_secp521r1_carry_add(t3, X1, Y1); + fiat_secp521r1_carry_add(t4, X2, Y2); + fiat_secp521r1_carry_mul(t3, t3, t4); + fiat_secp521r1_carry_add(t4, t0, t1); + fiat_secp521r1_carry_sub(t3, t3, t4); + fiat_secp521r1_carry_add(t4, Y1, Z1); + fiat_secp521r1_carry_add(t5, Y2, Z2); + fiat_secp521r1_carry_mul(t4, t4, t5); + fiat_secp521r1_carry_add(t5, t1, t2); + fiat_secp521r1_carry_sub(t4, t4, t5); + fiat_secp521r1_carry_add(X3, X1, Z1); + fiat_secp521r1_carry_add(Y3, X2, Z2); + fiat_secp521r1_carry_mul(X3, X3, Y3); + fiat_secp521r1_carry_add(Y3, t0, t2); + fiat_secp521r1_carry_sub(Y3, X3, Y3); + fiat_secp521r1_carry_mul(Z3, b, t2); + fiat_secp521r1_carry_sub(X3, Y3, Z3); + fiat_secp521r1_carry_add(Z3, X3, X3); + fiat_secp521r1_carry_add(X3, X3, Z3); + fiat_secp521r1_carry_sub(Z3, t1, X3); + fiat_secp521r1_carry_add(X3, t1, X3); + fiat_secp521r1_carry_mul(Y3, b, Y3); + fiat_secp521r1_carry_add(t1, t2, t2); + fiat_secp521r1_carry_add(t2, t1, t2); + fiat_secp521r1_carry_sub(Y3, Y3, t2); + fiat_secp521r1_carry_sub(Y3, Y3, t0); + fiat_secp521r1_carry_add(t1, Y3, Y3); + fiat_secp521r1_carry_add(Y3, t1, Y3); + fiat_secp521r1_carry_add(t1, t0, t0); + fiat_secp521r1_carry_add(t0, t1, t0); + fiat_secp521r1_carry_sub(t0, t0, t2); + fiat_secp521r1_carry_mul(t1, t4, Y3); + fiat_secp521r1_carry_mul(t2, t0, Y3); + fiat_secp521r1_carry_mul(Y3, X3, Z3); + fiat_secp521r1_carry_add(Y3, Y3, t2); + fiat_secp521r1_carry_mul(X3, t3, X3); + fiat_secp521r1_carry_sub(X3, X3, t1); + fiat_secp521r1_carry_mul(Z3, t4, Z3); + fiat_secp521r1_carry_mul(t1, t3, t0); + fiat_secp521r1_carry_add(Z3, Z3, t1); +} + +/* constants */ +#define RADIX 5 +#define DRADIX (1 << RADIX) +#define DRADIX_WNAF ((DRADIX) << 1) + +/*- + * precomp for wnaf scalar multiplication: + * precomp[0] = 1P + * precomp[1] = 3P + * precomp[2] = 5P + * precomp[3] = 7P + * precomp[4] = 9P + * ... + */ +static void +precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P) +{ + int i; + + fe_copy(precomp[0].X, P->X); + fe_copy(precomp[0].Y, P->Y); + fe_copy(precomp[0].Z, const_one); + point_double(&precomp[DRADIX / 2 - 1], &precomp[0]); + + for (i = 1; i < DRADIX / 2; i++) + point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]); +} + +/* fetch a scalar bit */ +static int +scalar_get_bit(const unsigned char in[66], int idx) +{ + int widx, rshift; + + widx = idx >> 3; + rshift = idx & 0x7; + + if (idx < 0 || widx >= 66) + return 0; + + return (in[widx] >> rshift) & 0x1; +} + +/*- + * Compute "regular" wnaf representation of a scalar. + * See "Exponent Recoding and Regular Exponentiation Algorithms", + * Tunstall et al., AfricaCrypt 2009, Alg 6. + * It forces an odd scalar and outputs digits in + * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...} + * i.e. signed odd digits with _no zeroes_ -- that makes it "regular". + */ +static void +scalar_rwnaf(int8_t out[106], const unsigned char in[66]) +{ + int i; + int8_t window, d; + + window = (in[0] & (DRADIX_WNAF - 1)) | 1; + for (i = 0; i < 105; i++) { + d = (window & (DRADIX_WNAF - 1)) - DRADIX; + out[i] = d; + window = (window - d) >> RADIX; + window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1; + window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2; + window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3; + window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4; + window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5; + } + out[i] = window; +} + +/*- + * Compute "textbook" wnaf representation of a scalar. + * NB: not constant time + */ +static void +scalar_wnaf(int8_t out[529], const unsigned char in[66]) +{ + int i; + int8_t window, d; + + window = in[0] & (DRADIX_WNAF - 1); + for (i = 0; i < 529; i++) { + d = 0; + if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX)) + d -= DRADIX_WNAF; + out[i] = d; + window = (window - d) >> 1; + window += scalar_get_bit(in, i + 1 + RADIX) << RADIX; + } +} + +/*- + * Simultaneous scalar multiplication: interleaved "textbook" wnaf. + * NB: not constant time + */ +static void +var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], + const unsigned char b[66], const pt_aff_t *P) +{ + int i, d, is_neg, is_inf = 1, flipped = 0; + int8_t anaf[529] = { 0 }; + int8_t bnaf[529] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_wnaf(anaf, a); + scalar_wnaf(bnaf, b); + + for (i = 528; i >= 0; i--) { + if (!is_inf) + point_double(&Q, &Q); + if ((d = bnaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp521r1_carry_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &precomp[d].X); + fe_copy(Q.Y, &precomp[d].Y); + fe_copy(Q.Z, &precomp[d].Z); + is_inf = 0; + } else + point_add_proj(&Q, &Q, &precomp[d]); + } + if ((d = anaf[i])) { + if ((is_neg = d < 0) != flipped) { + fiat_secp521r1_carry_opp(Q.Y, Q.Y); + flipped ^= 1; + } + d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; + if (is_inf) { + /* initialize accumulator */ + fe_copy(Q.X, &lut_cmb[0][d].X); + fe_copy(Q.Y, &lut_cmb[0][d].Y); + fe_copy(Q.Z, const_one); + is_inf = 0; + } else + point_add_mixed(&Q, &Q, &lut_cmb[0][d]); + } + } + + if (is_inf) { + /* initialize accumulator to inf: all-zero scalars */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + } + + if (flipped) { + /* correct sign */ + fiat_secp521r1_carry_opp(Q.Y, Q.Y); + } + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp521r1_inv(Q.Z, Q.Z); + fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z); + fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. + */ +static void +var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], + const pt_aff_t *P) +{ + int i, j, d, diff, is_neg; + int8_t rnaf[106] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; + pt_prj_t precomp[DRADIX / 2]; + + precomp_wnaf(precomp, P); + scalar_rwnaf(rnaf, scalar); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + /* initialize accumulator to high digit */ + d = (rnaf[105] - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp521r1_selectznz(Q.X, diff, Q.X, precomp[j].X); + fiat_secp521r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y); + fiat_secp521r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z); + } + + for (i = 104; i >= 0; i--) { + for (j = 0; j < RADIX; j++) + point_double(&Q, &Q); + d = rnaf[i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (j = 0; j < DRADIX / 2; j++) { + diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp521r1_selectznz(lut.X, diff, lut.X, precomp[j].X); + fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y); + fiat_secp521r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z); + } + /* negate lut point if digit is negative */ + fiat_secp521r1_carry_opp(out->Y, lut.Y); + fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_proj(&Q, &Q, &lut); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, precomp[0].X); + fiat_secp521r1_carry_opp(lut.Y, precomp[0].Y); + fe_copy(lut.Z, precomp[0].Z); + point_add_proj(&lut, &lut, &Q); + fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X); + fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y); + fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp521r1_inv(Q.Z, Q.Z); + fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z); + fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Fixed scalar multiplication: comb with interleaving. + */ +static void +fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) +{ + int i, j, k, d, diff, is_neg = 0; + int8_t rnaf[106] = { 0 }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; + + scalar_rwnaf(rnaf, scalar); + + /* initalize accumulator to inf */ + fe_set_zero(Q.X); + fe_copy(Q.Y, const_one); + fe_set_zero(Q.Z); + +#if defined(_MSC_VER) + /* result still unsigned: yes we know */ +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + for (i = 8; i >= 0; i--) { + for (j = 0; i != 8 && j < RADIX; j++) + point_double(&Q, &Q); + for (j = 0; j < 13; j++) { + if (j * 9 + i > 105) + continue; + d = rnaf[j * 9 + i]; + /* is_neg = (d < 0) ? 1 : 0 */ + is_neg = (d >> (8 * sizeof(int) - 1)) & 1; + /* d = abs(d) */ + d = (d ^ -is_neg) + is_neg; + d = (d - 1) >> 1; + for (k = 0; k < DRADIX / 2; k++) { + diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1; + fiat_secp521r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X); + fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y); + } + /* negate lut point if digit is negative */ + fiat_secp521r1_carry_opp(out->Y, lut.Y); + fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); + point_add_mixed(&Q, &Q, &lut); + } + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* conditionally subtract P if the scalar was even */ + fe_copy(lut.X, lut_cmb[0][0].X); + fiat_secp521r1_carry_opp(lut.Y, lut_cmb[0][0].Y); + point_add_mixed(&R, &Q, &lut); + fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X); + fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y); + fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z); + + /* convert to affine -- NB depends on coordinate system */ + fiat_secp521r1_inv(Q.Z, Q.Z); + fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z); + fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z); +} + +/*- + * Wrapper: simultaneous scalar mutiplication. + * outx, outy := a * G + b * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char a[66], + const unsigned char b[66], + const unsigned char inx[66], + const unsigned char iny[66]) +{ + pt_aff_t P; + + fiat_secp521r1_from_bytes(P.X, inx); + fiat_secp521r1_from_bytes(P.Y, iny); + /* simultaneous scalar multiplication */ + var_smul_wnaf_two(&P, a, b, &P); + + fiat_secp521r1_to_bytes(outx, P.X); + fiat_secp521r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: fixed scalar mutiplication. + * outx, outy := scalar * G + * Everything is LE byte ordering. + */ +static void +point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66]) +{ + pt_aff_t P; + + /* fixed scmul function */ + fixed_smul_cmb(&P, scalar); + fiat_secp521r1_to_bytes(outx, P.X); + fiat_secp521r1_to_bytes(outy, P.Y); +} + +/*- + * Wrapper: variable point scalar mutiplication. + * outx, outy := scalar * P + * where P = (inx, iny). + * Everything is LE byte ordering. + */ +static void +point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66], + const unsigned char inx[66], + const unsigned char iny[66]) +{ + pt_aff_t P; + + fiat_secp521r1_from_bytes(P.X, inx); + fiat_secp521r1_from_bytes(P.Y, iny); + /* var scmul function */ + var_smul_rwnaf(&P, scalar, &P); + fiat_secp521r1_to_bytes(outx, P.X); + fiat_secp521r1_to_bytes(outy, P.Y); +} + +#undef RADIX +#include "ecp.h" +#include "mpi-priv.h" +#include "mplogic.h" + +/*- + * reverse bytes -- total hack + */ +#define MP_BE2LE(a) \ + do { \ + unsigned char z_bswap; \ + z_bswap = a[0]; \ + a[0] = a[65]; \ + a[65] = z_bswap; \ + z_bswap = a[1]; \ + a[1] = a[64]; \ + a[64] = z_bswap; \ + z_bswap = a[2]; \ + a[2] = a[63]; \ + a[63] = z_bswap; \ + z_bswap = a[3]; \ + a[3] = a[62]; \ + a[62] = z_bswap; \ + z_bswap = a[4]; \ + a[4] = a[61]; \ + a[61] = z_bswap; \ + z_bswap = a[5]; \ + a[5] = a[60]; \ + a[60] = z_bswap; \ + z_bswap = a[6]; \ + a[6] = a[59]; \ + a[59] = z_bswap; \ + z_bswap = a[7]; \ + a[7] = a[58]; \ + a[58] = z_bswap; \ + z_bswap = a[8]; \ + a[8] = a[57]; \ + a[57] = z_bswap; \ + z_bswap = a[9]; \ + a[9] = a[56]; \ + a[56] = z_bswap; \ + z_bswap = a[10]; \ + a[10] = a[55]; \ + a[55] = z_bswap; \ + z_bswap = a[11]; \ + a[11] = a[54]; \ + a[54] = z_bswap; \ + z_bswap = a[12]; \ + a[12] = a[53]; \ + a[53] = z_bswap; \ + z_bswap = a[13]; \ + a[13] = a[52]; \ + a[52] = z_bswap; \ + z_bswap = a[14]; \ + a[14] = a[51]; \ + a[51] = z_bswap; \ + z_bswap = a[15]; \ + a[15] = a[50]; \ + a[50] = z_bswap; \ + z_bswap = a[16]; \ + a[16] = a[49]; \ + a[49] = z_bswap; \ + z_bswap = a[17]; \ + a[17] = a[48]; \ + a[48] = z_bswap; \ + z_bswap = a[18]; \ + a[18] = a[47]; \ + a[47] = z_bswap; \ + z_bswap = a[19]; \ + a[19] = a[46]; \ + a[46] = z_bswap; \ + z_bswap = a[20]; \ + a[20] = a[45]; \ + a[45] = z_bswap; \ + z_bswap = a[21]; \ + a[21] = a[44]; \ + a[44] = z_bswap; \ + z_bswap = a[22]; \ + a[22] = a[43]; \ + a[43] = z_bswap; \ + z_bswap = a[23]; \ + a[23] = a[42]; \ + a[42] = z_bswap; \ + z_bswap = a[24]; \ + a[24] = a[41]; \ + a[41] = z_bswap; \ + z_bswap = a[25]; \ + a[25] = a[40]; \ + a[40] = z_bswap; \ + z_bswap = a[26]; \ + a[26] = a[39]; \ + a[39] = z_bswap; \ + z_bswap = a[27]; \ + a[27] = a[38]; \ + a[38] = z_bswap; \ + z_bswap = a[28]; \ + a[28] = a[37]; \ + a[37] = z_bswap; \ + z_bswap = a[29]; \ + a[29] = a[36]; \ + a[36] = z_bswap; \ + z_bswap = a[30]; \ + a[30] = a[35]; \ + a[35] = z_bswap; \ + z_bswap = a[31]; \ + a[31] = a[34]; \ + a[34] = z_bswap; \ + z_bswap = a[32]; \ + a[32] = a[33]; \ + a[33] = z_bswap; \ + } while (0) + +static mp_err +point_mul_g_secp521r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[66]; + unsigned char b_y[66]; + unsigned char b_n[66]; + mp_err res; + + ARGCHK(n != NULL && out_x != NULL && out_y != NULL, MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 521 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66)); + MP_BE2LE(b_n); + point_mul_g_secp521r1(b_x, b_y, b_n); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_secp521r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) +{ + unsigned char b_x[66]; + unsigned char b_y[66]; + unsigned char b_n[66]; + mp_err res; + + ARGCHK(n != NULL && in_x != NULL && in_y != NULL && out_x != NULL && + out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n) > 521 || mp_cmp_z(n) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 66)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n); + point_mul_secp521r1(b_x, b_y, b_n, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66)); + +CLEANUP: + return res; +} + +static mp_err +point_mul_two_secp521r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) +{ + unsigned char b_x[66]; + unsigned char b_y[66]; + unsigned char b_n1[66]; + unsigned char b_n2[66]; + mp_err res; + + /* If n2 == NULL or 0, this is just a base-point multiplication. */ + if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) + return point_mul_g_secp521r1_wrap(n1, out_x, out_y, group); + + /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ + if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) + return point_mul_secp521r1_wrap(n2, in_x, in_y, out_x, out_y, group); + + ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, + MP_BADARG); + + /* fail on out of range scalars */ + if (mpl_significant_bits(n1) > 521 || mp_cmp_z(n1) != MP_GT || + mpl_significant_bits(n2) > 521 || mp_cmp_z(n2) != MP_GT) + return MP_RANGE; + + MP_CHECKOK(mp_to_fixlen_octets(n1, b_n1, 66)); + MP_CHECKOK(mp_to_fixlen_octets(n2, b_n2, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 66)); + MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 66)); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_BE2LE(b_n1); + MP_BE2LE(b_n2); + point_mul_two_secp521r1(b_x, b_y, b_n1, b_n2, b_x, b_y); + MP_BE2LE(b_x); + MP_BE2LE(b_y); + MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); + MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66)); + +CLEANUP: + return res; +} + +mp_err +ec_group_set_secp521r1(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P521) { + group->base_point_mul = &point_mul_g_secp521r1_wrap; + group->point_mul = &point_mul_secp521r1_wrap; + group->points_mul = &point_mul_two_secp521r1_wrap; + } + return MP_OKAY; +} + +#endif /* __SIZEOF_INT128__ */ diff --git a/security/nss/lib/freebl/exports.gyp b/security/nss/lib/freebl/exports.gyp new file mode 100644 index 0000000000..af5c782a53 --- /dev/null +++ b/security/nss/lib/freebl/exports.gyp @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +{ + 'includes': [ + '../../coreconf/config.gypi' + ], + 'targets': [ + { + 'target_name': 'lib_freebl_exports', + 'type': 'none', + 'copies': [ + { + 'files': [ + 'blapit.h', + 'ecl/ecl-exp.h', + 'shsign.h' + ], + 'conditions': [ + [ 'OS=="linux"', { + 'files': [ + 'nsslowhash.h', + ], + }], + ], + 'destination': '<(nss_public_dist_dir)/<(module)' + }, + { + 'files': [ + 'cmac.h', + 'alghmac.h', + 'blapi.h', + 'blake2b.h', + 'chacha20poly1305.h', + 'ec.h', + 'ecl/ecl-curve.h', + 'ecl/ecl.h', + 'ecl/eclt.h', + 'hmacct.h', + 'secmpi.h', + 'secrng.h' + ], + 'destination': '<(nss_private_dist_dir)/<(module)' + } + ] + } + ], + 'variables': { + 'module': 'nss' + } +} diff --git a/security/nss/lib/freebl/fipsfreebl.c b/security/nss/lib/freebl/fipsfreebl.c new file mode 100644 index 0000000000..6d24372d60 --- /dev/null +++ b/security/nss/lib/freebl/fipsfreebl.c @@ -0,0 +1,2251 @@ +/* + * PKCS #11 FIPS Power-Up Self Test. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* $Id: fipstest.c,v 1.31 2012/06/28 17:55:06 rrelyea%redhat.com Exp $ */ + +#ifndef NSS_FIPS_DISABLED +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "seccomon.h" /* Required for RSA and DSA. */ +#include "secerr.h" +#include "prtypes.h" +#include "secitem.h" +#include "pkcs11t.h" +#include "cmac.h" + +#include "ec.h" /* Required for EC */ + +/* + * different platforms have different ways of calling and initial entry point + * when the dll/.so is loaded. Most platforms support either a posix pragma + * or the GCC attribute. Some platforms suppor a pre-defined name, and some + * platforms have a link line way of invoking this function. + */ + +/* The pragma */ +#if defined(USE_INIT_PRAGMA) +#pragma init(bl_startup_tests) +#endif + +/* GCC Attribute */ +#if defined(__GNUC__) && !defined(NSS_NO_INIT_SUPPORT) +#define INIT_FUNCTION __attribute__((constructor)) +#else +#define INIT_FUNCTION +#endif + +static void INIT_FUNCTION bl_startup_tests(void); + +/* Windows pre-defined entry */ +#if defined(XP_WIN) && !defined(NSS_NO_INIT_SUPPORT) +#include + +BOOL WINAPI +DllMain( + HINSTANCE hinstDLL, // handle to DLL module + DWORD fdwReason, // reason for calling function + LPVOID lpReserved) // reserved +{ + // Perform actions based on the reason for calling. + switch (fdwReason) { + case DLL_PROCESS_ATTACH: + // Initialize once for each new process. + // Return FALSE to fail DLL load. + bl_startup_tests(); + break; + + case DLL_THREAD_ATTACH: + // Do thread-specific initialization. + break; + + case DLL_THREAD_DETACH: + // Do thread-specific cleanup. + break; + + case DLL_PROCESS_DETACH: + // Perform any necessary cleanup. + break; + } + return TRUE; // Successful DLL_PROCESS_ATTACH. +} +#endif + +/* insert other platform dependent init entry points here, or modify + * the linker line */ + +/* FIPS preprocessor directives for RC2-ECB and RC2-CBC. */ +#define FIPS_RC2_KEY_LENGTH 5 /* 40-bits */ +#define FIPS_RC2_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_RC2_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for RC4. */ +#define FIPS_RC4_KEY_LENGTH 5 /* 40-bits */ +#define FIPS_RC4_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_RC4_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for DES-ECB and DES-CBC. */ +#define FIPS_DES_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_DES_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for DES3-CBC and DES3-ECB. */ +#define FIPS_DES3_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_DES3_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for AES-ECB and AES-CBC. */ +#define FIPS_AES_BLOCK_SIZE 16 /* 128-bits */ +#define FIPS_AES_ENCRYPT_LENGTH 16 /* 128-bits */ +#define FIPS_AES_DECRYPT_LENGTH 16 /* 128-bits */ +#define FIPS_AES_CMAC_LENGTH 16 /* 128-bits */ +#define FIPS_AES_128_KEY_SIZE 16 /* 128-bits */ +#define FIPS_AES_192_KEY_SIZE 24 /* 192-bits */ +#define FIPS_AES_256_KEY_SIZE 32 /* 256-bits */ + +/* FIPS preprocessor directives for message digests */ +#define FIPS_KNOWN_HASH_MESSAGE_LENGTH 64 /* 512-bits */ + +/* FIPS preprocessor directives for RSA. */ +#define FIPS_RSA_TYPE siBuffer +#define FIPS_RSA_PUBLIC_EXPONENT_LENGTH 3 /* 24-bits */ +#define FIPS_RSA_PRIVATE_VERSION_LENGTH 1 /* 8-bits */ +#define FIPS_RSA_MESSAGE_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_COEFFICIENT_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_PRIME0_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_PRIME1_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_EXPONENT0_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_EXPONENT1_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_PRIVATE_EXPONENT_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_ENCRYPT_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_DECRYPT_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_SIGNATURE_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_MODULUS_LENGTH 256 /* 2048-bits */ + +/* FIPS preprocessor directives for DSA. */ +#define FIPS_DSA_TYPE siBuffer +#define FIPS_DSA_DIGEST_LENGTH 20 /* 160-bits */ +#define FIPS_DSA_SUBPRIME_LENGTH 20 /* 160-bits */ +#define FIPS_DSA_SIGNATURE_LENGTH 40 /* 320-bits */ +#define FIPS_DSA_PRIME_LENGTH 128 /* 1024-bits */ +#define FIPS_DSA_BASE_LENGTH 128 /* 1024-bits */ + +/* FIPS preprocessor directives for RNG. */ +#define FIPS_RNG_XKEY_LENGTH 32 /* 256-bits */ + +static SECStatus +freebl_fips_DES3_PowerUpSelfTest(void) +{ + /* DES3 Known Key (56-bits). */ + static const PRUint8 des3_known_key[] = { "ANSI Triple-DES Key Data" }; + + /* DES3-CBC Known Initialization Vector (64-bits). */ + static const PRUint8 des3_cbc_known_initialization_vector[] = { "Security" }; + + /* DES3 Known Plaintext (64-bits). */ + static const PRUint8 des3_ecb_known_plaintext[] = { "Netscape" }; + static const PRUint8 des3_cbc_known_plaintext[] = { "Netscape" }; + + /* DES3 Known Ciphertext (64-bits). */ + static const PRUint8 des3_ecb_known_ciphertext[] = { + 0x55, 0x8e, 0xad, 0x3c, 0xee, 0x49, 0x69, 0xbe + }; + static const PRUint8 des3_cbc_known_ciphertext[] = { + 0x43, 0xdc, 0x6a, 0xc1, 0xaf, 0xa6, 0x32, 0xf5 + }; + + /* DES3 variables. */ + PRUint8 des3_computed_ciphertext[FIPS_DES3_ENCRYPT_LENGTH]; + PRUint8 des3_computed_plaintext[FIPS_DES3_DECRYPT_LENGTH]; + DESContext *des3_context; + unsigned int des3_bytes_encrypted; + unsigned int des3_bytes_decrypted; + SECStatus des3_status; + + /*******************************************************/ + /* DES3-ECB Single-Round Known Answer Encryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, NULL, + NSS_DES_EDE3, PR_TRUE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext, + &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH, + des3_ecb_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_ciphertext, des3_ecb_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /*******************************************************/ + /* DES3-ECB Single-Round Known Answer Decryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, NULL, + NSS_DES_EDE3, PR_FALSE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Decrypt(des3_context, des3_computed_plaintext, + &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH, + des3_ecb_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_plaintext, des3_ecb_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /*******************************************************/ + /* DES3-CBC Single-Round Known Answer Encryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, + des3_cbc_known_initialization_vector, + NSS_DES_EDE3_CBC, PR_TRUE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext, + &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH, + des3_cbc_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_ciphertext, des3_cbc_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /*******************************************************/ + /* DES3-CBC Single-Round Known Answer Decryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, + des3_cbc_known_initialization_vector, + NSS_DES_EDE3_CBC, PR_FALSE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Decrypt(des3_context, des3_computed_plaintext, + &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH, + des3_cbc_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_plaintext, des3_cbc_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +/* AES self-test for 128-bit, 192-bit, or 256-bit key sizes*/ +static SECStatus +freebl_fips_AES_PowerUpSelfTest(int aes_key_size) +{ + /* AES Known Key (up to 256-bits). */ + static const PRUint8 aes_known_key[] = { "AES-128 RIJNDAELLEADNJIR 821-SEA" }; + + /* AES-CBC Known Initialization Vector (128-bits). */ + static const PRUint8 aes_cbc_known_initialization_vector[] = { "SecurityytiruceS" }; + + /* AES Known Plaintext (128-bits). (blocksize is 128-bits) */ + static const PRUint8 aes_known_plaintext[] = { "NetscapeepacsteN" }; + + static const PRUint8 aes_gcm_known_aad[] = { "MozillaallizoM" }; + + /* AES Known Ciphertext (128-bit key). */ + static const PRUint8 aes_ecb128_known_ciphertext[] = { + 0x3c, 0xa5, 0x96, 0xf3, 0x34, 0x6a, 0x96, 0xc1, + 0x03, 0x88, 0x16, 0x7b, 0x20, 0xbf, 0x35, 0x47 + }; + + static const PRUint8 aes_cbc128_known_ciphertext[] = { + 0xcf, 0x15, 0x1d, 0x4f, 0x96, 0xe4, 0x4f, 0x63, + 0x15, 0x54, 0x14, 0x1d, 0x4e, 0xd8, 0xd5, 0xea + }; + + static const PRUint8 aes_gcm128_known_ciphertext[] = { + 0x63, 0xf4, 0x95, 0x28, 0xe6, 0x78, 0xee, 0x6e, + 0x4f, 0xe0, 0xfc, 0x8d, 0xd7, 0xa2, 0xb1, 0xff, + 0x0c, 0x97, 0x1b, 0x0a, 0xdd, 0x97, 0x75, 0xed, + 0x8b, 0xde, 0xbf, 0x16, 0x5e, 0x57, 0x6b, 0x4f + }; + + static const PRUint8 aes_cmac128_known_ciphertext[] = { + 0x54, 0x11, 0xe2, 0x57, 0xbd, 0x2a, 0xdf, 0x9d, + 0x1a, 0x89, 0x72, 0x80, 0x84, 0x4c, 0x7e, 0x93 + }; + + /* AES Known Ciphertext (192-bit key). */ + static const PRUint8 aes_ecb192_known_ciphertext[] = { + 0xa0, 0x18, 0x62, 0xed, 0x88, 0x19, 0xcb, 0x62, + 0x88, 0x1d, 0x4d, 0xfe, 0x84, 0x02, 0x89, 0x0e + }; + + static const PRUint8 aes_cbc192_known_ciphertext[] = { + 0x83, 0xf7, 0xa4, 0x76, 0xd1, 0x6f, 0x07, 0xbe, + 0x07, 0xbc, 0x43, 0x2f, 0x6d, 0xad, 0x29, 0xe1 + }; + + static const PRUint8 aes_gcm192_known_ciphertext[] = { + 0xc1, 0x0b, 0x92, 0x1d, 0x68, 0x21, 0xf4, 0x25, + 0x41, 0x61, 0x20, 0x2d, 0x59, 0x7f, 0x53, 0xde, + 0x93, 0x39, 0xab, 0x09, 0x76, 0x41, 0x57, 0x2b, + 0x90, 0x2e, 0x44, 0xbb, 0x52, 0x03, 0xe9, 0x07 + }; + + static const PRUint8 aes_cmac192_known_ciphertext[] = { + 0x0e, 0x07, 0x99, 0x1e, 0xf6, 0xee, 0xfa, 0x2c, + 0x1b, 0xfc, 0xce, 0x94, 0x92, 0x2d, 0xf1, 0xab + }; + + /* AES Known Ciphertext (256-bit key). */ + static const PRUint8 aes_ecb256_known_ciphertext[] = { + 0xdb, 0xa6, 0x52, 0x01, 0x8a, 0x70, 0xae, 0x66, + 0x3a, 0x99, 0xd8, 0x95, 0x7f, 0xfb, 0x01, 0x67 + }; + + static const PRUint8 aes_cbc256_known_ciphertext[] = { + 0x37, 0xea, 0x07, 0x06, 0x31, 0x1c, 0x59, 0x27, + 0xc5, 0xc5, 0x68, 0x71, 0x6e, 0x34, 0x40, 0x16 + }; + + static const PRUint8 aes_gcm256_known_ciphertext[] = { + 0x5d, 0x9e, 0xd2, 0xa2, 0x74, 0x9c, 0xd9, 0x1c, + 0xd1, 0xc9, 0xee, 0x5d, 0xb6, 0xf2, 0xc9, 0xb6, + 0x79, 0x27, 0x53, 0x02, 0xa3, 0xdc, 0x22, 0xce, + 0xf4, 0xb0, 0xc1, 0x8c, 0x86, 0x51, 0xf5, 0xa1 + }; + + static const PRUint8 aes_cmac256_known_ciphertext[] = { + 0xc1, 0x26, 0x69, 0x32, 0x51, 0x13, 0x65, 0xac, + 0x71, 0x23, 0xe4, 0xe7, 0xb9, 0x0c, 0x88, 0x9f + + }; + + const PRUint8 *aes_ecb_known_ciphertext = + (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_ecb128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_ecb192_known_ciphertext : aes_ecb256_known_ciphertext; + + const PRUint8 *aes_cbc_known_ciphertext = + (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cbc128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cbc192_known_ciphertext : aes_cbc256_known_ciphertext; + + const PRUint8 *aes_gcm_known_ciphertext = + (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_gcm128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_gcm192_known_ciphertext : aes_gcm256_known_ciphertext; + + const PRUint8 *aes_cmac_known_ciphertext = + (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cmac128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cmac192_known_ciphertext : aes_cmac256_known_ciphertext; + + /* AES variables. */ + PRUint8 aes_computed_ciphertext[FIPS_AES_ENCRYPT_LENGTH * 2]; + PRUint8 aes_computed_plaintext[FIPS_AES_DECRYPT_LENGTH * 2]; + AESContext *aes_context; + CMACContext *cmac_context; + unsigned int aes_bytes_encrypted; + unsigned int aes_bytes_decrypted; + CK_NSS_GCM_PARAMS gcmParams; + SECStatus aes_status; + + /*check if aes_key_size is 128, 192, or 256 bits */ + if ((aes_key_size != FIPS_AES_128_KEY_SIZE) && + (aes_key_size != FIPS_AES_192_KEY_SIZE) && + (aes_key_size != FIPS_AES_256_KEY_SIZE)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-ECB Single-Round Known Answer Encryption Test: */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_TRUE, + aes_key_size, FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext, + &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH, + aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_ciphertext, aes_ecb_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-ECB Single-Round Known Answer Decryption Test: */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_FALSE, + aes_key_size, FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Decrypt(aes_context, aes_computed_plaintext, + &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH, + aes_ecb_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-CBC Single-Round Known Answer Encryption Test. */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, + aes_cbc_known_initialization_vector, + NSS_AES_CBC, PR_TRUE, aes_key_size, + FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext, + &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH, + aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_ciphertext, aes_cbc_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-CBC Single-Round Known Answer Decryption Test. */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, + aes_cbc_known_initialization_vector, + NSS_AES_CBC, PR_FALSE, aes_key_size, + FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Decrypt(aes_context, aes_computed_plaintext, + &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH, + aes_cbc_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-GCM Single-Round Known Answer Encryption Test. */ + /******************************************************/ + + gcmParams.pIv = (PRUint8 *)aes_cbc_known_initialization_vector; + gcmParams.ulIvLen = FIPS_AES_BLOCK_SIZE; + gcmParams.pAAD = (PRUint8 *)aes_gcm_known_aad; + gcmParams.ulAADLen = sizeof(aes_gcm_known_aad); + gcmParams.ulTagBits = FIPS_AES_BLOCK_SIZE * 8; + aes_context = AES_CreateContext(aes_known_key, + (PRUint8 *)&gcmParams, + NSS_AES_GCM, PR_TRUE, aes_key_size, + FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext, + &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH * 2, + aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH * 2) || + (PORT_Memcmp(aes_computed_ciphertext, aes_gcm_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH * 2) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-GCM Single-Round Known Answer Decryption Test. */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, + (PRUint8 *)&gcmParams, + NSS_AES_GCM, PR_FALSE, aes_key_size, + FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Decrypt(aes_context, aes_computed_plaintext, + &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH * 2, + aes_gcm_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH * 2); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-CMAC Known Answer Encryption Test. */ + /******************************************************/ + cmac_context = CMAC_Create(CMAC_AES, aes_known_key, aes_key_size); + + if (cmac_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = CMAC_Begin(cmac_context); + if (aes_status != SECSuccess) { + CMAC_Destroy(cmac_context, PR_TRUE); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + aes_status = CMAC_Update(cmac_context, aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH); + if (aes_status != SECSuccess) { + CMAC_Destroy(cmac_context, PR_TRUE); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + aes_status = CMAC_Finish(cmac_context, aes_computed_ciphertext, + &aes_bytes_encrypted, FIPS_AES_CMAC_LENGTH); + + CMAC_Destroy(cmac_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_encrypted != FIPS_AES_CMAC_LENGTH) || + (PORT_Memcmp(aes_computed_ciphertext, aes_cmac_known_ciphertext, + FIPS_AES_CMAC_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +/* Known Hash Message (512-bits). Used for all hashes (incl. SHA-N [N>1]). */ +static const PRUint8 known_hash_message[] = { + "The test message for the MD2, MD5, and SHA-1 hashing algorithms." +}; + +/****************************************************/ +/* Single Round HMAC SHA-X test */ +/****************************************************/ +static SECStatus +freebl_fips_HMAC(unsigned char *hmac_computed, + const PRUint8 *secret_key, + unsigned int secret_key_length, + const PRUint8 *message, + unsigned int message_length, + HASH_HashType hashAlg) +{ + SECStatus hmac_status = SECFailure; + HMACContext *cx = NULL; + SECHashObject *hashObj = NULL; + unsigned int bytes_hashed = 0; + + hashObj = (SECHashObject *)HASH_GetRawHashObject(hashAlg); + + if (!hashObj) + return (SECFailure); + + cx = HMAC_Create(hashObj, secret_key, + secret_key_length, + PR_TRUE); /* PR_TRUE for in FIPS mode */ + + if (cx == NULL) + return (SECFailure); + + HMAC_Begin(cx); + HMAC_Update(cx, message, message_length); + hmac_status = HMAC_Finish(cx, hmac_computed, &bytes_hashed, + hashObj->length); + + HMAC_Destroy(cx, PR_TRUE); + + return (hmac_status); +} + +static SECStatus +freebl_fips_HMAC_PowerUpSelfTest(void) +{ + static const PRUint8 HMAC_known_secret_key[] = { + "Firefox and ThunderBird are awesome!" + }; + + static const PRUint8 HMAC_known_secret_key_length = sizeof HMAC_known_secret_key; + + /* known SHA1 hmac (20 bytes) */ + static const PRUint8 known_SHA1_hmac[] = { + 0xd5, 0x85, 0xf6, 0x5b, 0x39, 0xfa, 0xb9, 0x05, + 0x3b, 0x57, 0x1d, 0x61, 0xe7, 0xb8, 0x84, 0x1e, + 0x5d, 0x0e, 0x1e, 0x11 + }; + + /* known SHA224 hmac (28 bytes) */ + static const PRUint8 known_SHA224_hmac[] = { + 0x1c, 0xc3, 0x06, 0x8e, 0xce, 0x37, 0x68, 0xfb, + 0x1a, 0x82, 0x4a, 0xbe, 0x2b, 0x00, 0x51, 0xf8, + 0x9d, 0xb6, 0xe0, 0x90, 0x0d, 0x00, 0xc9, 0x64, + 0x9a, 0xb8, 0x98, 0x4e + }; + + /* known SHA256 hmac (32 bytes) */ + static const PRUint8 known_SHA256_hmac[] = { + 0x05, 0x75, 0x9a, 0x9e, 0x70, 0x5e, 0xe7, 0x44, + 0xe2, 0x46, 0x4b, 0x92, 0x22, 0x14, 0x22, 0xe0, + 0x1b, 0x92, 0x8a, 0x0c, 0xfe, 0xf5, 0x49, 0xe9, + 0xa7, 0x1b, 0x56, 0x7d, 0x1d, 0x29, 0x40, 0x48 + }; + + /* known SHA384 hmac (48 bytes) */ + static const PRUint8 known_SHA384_hmac[] = { + 0xcd, 0x56, 0x14, 0xec, 0x05, 0x53, 0x06, 0x2b, + 0x7e, 0x9c, 0x8a, 0x18, 0x5e, 0xea, 0xf3, 0x91, + 0x33, 0xfb, 0x64, 0xf6, 0xe3, 0x9f, 0x89, 0x0b, + 0xaf, 0xbe, 0x83, 0x4d, 0x3f, 0x3c, 0x43, 0x4d, + 0x4a, 0x0c, 0x56, 0x98, 0xf8, 0xca, 0xb4, 0xaa, + 0x9a, 0xf4, 0x0a, 0xaf, 0x4f, 0x69, 0xca, 0x87 + }; + + /* known SHA512 hmac (64 bytes) */ + static const PRUint8 known_SHA512_hmac[] = { + 0xf6, 0x0e, 0x97, 0x12, 0x00, 0x67, 0x6e, 0xb9, + 0x0c, 0xb2, 0x63, 0xf0, 0x60, 0xac, 0x75, 0x62, + 0x70, 0x95, 0x2a, 0x52, 0x22, 0xee, 0xdd, 0xd2, + 0x71, 0xb1, 0xe8, 0x26, 0x33, 0xd3, 0x13, 0x27, + 0xcb, 0xff, 0x44, 0xef, 0x87, 0x97, 0x16, 0xfb, + 0xd3, 0x0b, 0x48, 0xbe, 0x12, 0x4e, 0xda, 0xb1, + 0x89, 0x90, 0xfb, 0x06, 0x0c, 0xbe, 0xe5, 0xc4, + 0xff, 0x24, 0x37, 0x3d, 0xc7, 0xe4, 0xe4, 0x37 + }; + + SECStatus hmac_status; + PRUint8 hmac_computed[HASH_LENGTH_MAX]; + + /***************************************************/ + /* HMAC SHA-1 Single-Round Known Answer HMAC Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA1); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA1_hmac, + SHA1_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-224 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA224); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA224_hmac, + SHA224_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-256 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA256); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA256_hmac, + SHA256_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-384 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA384); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA384_hmac, + SHA384_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-512 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA512); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA512_hmac, + SHA512_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +SECStatus +freebl_fips_TLS_PowerUpSelfTest(void) +{ + static const PRUint8 TLS_known_secret_key[] = { + "Firefox and ThunderBird are awesome!" + }; + + static const PRUint8 TLS_known_secret_key_length = sizeof TLS_known_secret_key; + + /* known tls prf with sha1/md5 */ + static const PRUint8 known_TLS_PRF[] = { + 0x87, 0x4c, 0xc0, 0xc5, 0x15, 0x14, 0x2b, 0xdc, + 0x73, 0x48, 0x9e, 0x88, 0x9d, 0xf5, 0x83, 0x2f, + 0x2d, 0x66, 0x1e, 0x78, 0x6c, 0x54, 0x78, 0x29, + 0xb9, 0xa4, 0x4c, 0x90, 0x5e, 0xa2, 0xe6, 0x5c, + 0xf1, 0x4f, 0xb5, 0x95, 0xa5, 0x54, 0xc0, 0x9f, + 0x84, 0x47, 0xb4, 0x4c, 0xda, 0xae, 0x19, 0x29, + 0x2b, 0x91, 0x2a, 0x81, 0x9d, 0x3a, 0x30, 0x40, + 0xc5, 0xdf, 0xbb, 0xfa, 0xd8, 0x4c, 0xbc, 0x18 + }; + + /* known SHA256 tls mac */ + static const PRUint8 known_TLS_SHA256[] = { + 0x66, 0xd6, 0x94, 0xd4, 0x0d, 0x32, 0x61, 0x38, + 0x26, 0xf6, 0x8b, 0xfe, 0x9e, 0xac, 0xa2, 0xf5, + 0x40, 0x52, 0x74, 0x3f, 0xbe, 0xb8, 0xca, 0x94, + 0xc3, 0x64, 0xd6, 0x02, 0xf5, 0x88, 0x98, 0x35, + 0x73, 0x9f, 0xce, 0xaa, 0x68, 0xe3, 0x7c, 0x93, + 0x30, 0x21, 0x45, 0xec, 0xe9, 0x8f, 0x1c, 0x7e, + 0xd1, 0x54, 0xf5, 0xbe, 0xff, 0xc8, 0xd7, 0x72, + 0x7f, 0x9c, 0x0c, 0x7f, 0xa9, 0xd3, 0x4a, 0xd2 + }; + +#ifdef NSS_FULL_POST + /* known SHA224 tls mac */ + static const PRUint8 known_TLS_SHA224[] = { + 0xd8, 0x68, 0x15, 0xff, 0xa1, 0xa2, 0x5e, 0x16, + 0xce, 0xb1, 0xfd, 0xbd, 0xda, 0x39, 0xbc, 0xa7, + 0x27, 0x32, 0x78, 0x94, 0x66, 0xf0, 0x84, 0xcf, + 0x46, 0xc0, 0x22, 0x76, 0xdc, 0x6b, 0x2e, 0xed, + 0x1d, 0x2d, 0xd2, 0x93, 0xfd, 0xae, 0xca, 0xf9, + 0xe0, 0x4c, 0x17, 0x23, 0x22, 0x5a, 0x73, 0x93, + 0x20, 0x0a, 0xbd, 0xa0, 0x72, 0xf8, 0x8b, 0x74, + 0xfb, 0xf1, 0xab, 0xb7, 0xe0, 0xec, 0x34, 0xc9 + }; + + /* known SHA384 tls mac */ + static const PRUint8 known_TLS_SHA384[] = { + 0xb2, 0xac, 0x06, 0x10, 0xad, 0x50, 0xd5, 0xdc, + 0xdb, 0x01, 0xea, 0xa6, 0x2d, 0x8a, 0x34, 0xb6, + 0xeb, 0x84, 0xbc, 0x37, 0xc9, 0x9f, 0xa1, 0x9c, + 0xd5, 0xbd, 0x4e, 0x66, 0x16, 0x24, 0xe5, 0x3d, + 0xce, 0x74, 0xe0, 0x30, 0x41, 0x5c, 0xdb, 0xb7, + 0x52, 0x1d, 0x2d, 0x4d, 0x9b, 0xbe, 0x6b, 0x86, + 0xda, 0x8a, 0xca, 0x73, 0x39, 0xb4, 0xc7, 0x8f, + 0x03, 0xb1, 0xf9, 0x7e, 0x65, 0xae, 0x17, 0x10 + }; + + /* known SHA512 tls mac */ + static const PRUint8 known_TLS_SHA512[] = { + 0x73, 0x21, 0x4f, 0x40, 0x81, 0x1e, 0x90, 0xa1, + 0x16, 0x40, 0x1e, 0x33, 0x69, 0xc5, 0x00, 0xc7, + 0xc4, 0x81, 0xa3, 0x4f, 0xa7, 0xcc, 0x4a, 0xeb, + 0x1a, 0x66, 0x00, 0x82, 0x52, 0xe2, 0x2f, 0x69, + 0x14, 0x59, 0x05, 0x7c, 0xb0, 0x32, 0xce, 0xcc, + 0xb7, 0xc9, 0xab, 0x0f, 0x73, 0x00, 0xe5, 0x52, + 0x9d, 0x6b, 0x0e, 0x66, 0x4b, 0xb3, 0x0b, 0x0d, + 0x34, 0x53, 0x97, 0x13, 0x84, 0x18, 0x31, 0x7a + }; +#endif + + SECStatus status; + PRUint8 tls_computed[HASH_LENGTH_MAX]; + SECItem secret; + SECItem seed; + SECItem result; + const char *tls_label = "fips test label"; + + secret.data = (unsigned char *)TLS_known_secret_key; + secret.len = TLS_known_secret_key_length; + seed.data = (unsigned char *)known_hash_message; + seed.len = FIPS_KNOWN_HASH_MESSAGE_LENGTH; + result.data = tls_computed; + result.len = sizeof(tls_computed); + + /***************************************************/ + /* TLS 1.0 PRF Known Answer Test */ + /***************************************************/ + + status = TLS_PRF(&secret, tls_label, &seed, &result, PR_TRUE); + + if ((status != SECSuccess) || + (result.len != HASH_LENGTH_MAX) || + (PORT_Memcmp(tls_computed, known_TLS_PRF, + HASH_LENGTH_MAX) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* TLS 1.2 SHA-256 Known Answer Test. */ + /***************************************************/ + + status = TLS_P_hash(HASH_AlgSHA256, &secret, tls_label, + &seed, &result, PR_TRUE); + + if ((status != SECSuccess) || + (result.len != HASH_LENGTH_MAX) || + (PORT_Memcmp(tls_computed, known_TLS_SHA256, + HASH_LENGTH_MAX) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + +#ifdef NSS_FULL_POST + /***************************************************/ + /* TLS 1.2 SHA-224 Known Answer Test. */ + /***************************************************/ + + status = TLS_P_hash(HASH_AlgSHA224, &secret, tls_label, + &seed, &result, PR_TRUE); + + if ((status != SECSuccess) || + (result.len != HASH_LENGTH_MAX) || + (PORT_Memcmp(tls_computed, known_TLS_SHA224, + HASH_LENGTH_MAX) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* TLS 1.2 SHA-384 Known Answer Test. */ + /***************************************************/ + + status = TLS_P_hash(HASH_AlgSHA384, &secret, tls_label, + &seed, &result, PR_TRUE); + + if ((status != SECSuccess) || + (result.len != HASH_LENGTH_MAX) || + (PORT_Memcmp(tls_computed, known_TLS_SHA384, + HASH_LENGTH_MAX) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* TLS 1.2 SHA-512 Known Answer Test. */ + /***************************************************/ + + status = TLS_P_hash(HASH_AlgSHA512, &secret, tls_label, + &seed, &result, PR_TRUE); + + if ((status != SECSuccess) || + (result.len != HASH_LENGTH_MAX) || + (PORT_Memcmp(tls_computed, known_TLS_SHA512, + HASH_LENGTH_MAX) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } +#endif + + return (SECSuccess); +} + +static SECStatus +freebl_fips_SHA_PowerUpSelfTest(void) +{ + /* SHA-1 Known Digest Message (160-bits). */ + static const PRUint8 sha1_known_digest[] = { + 0x0a, 0x6d, 0x07, 0xba, 0x1e, 0xbd, 0x8a, 0x1b, + 0x72, 0xf6, 0xc7, 0x22, 0xf1, 0x27, 0x9f, 0xf0, + 0xe0, 0x68, 0x47, 0x7a + }; + + /* SHA-224 Known Digest Message (224-bits). */ + static const PRUint8 sha224_known_digest[] = { + 0x89, 0x5e, 0x7f, 0xfd, 0x0e, 0xd8, 0x35, 0x6f, + 0x64, 0x6d, 0xf2, 0xde, 0x5e, 0xed, 0xa6, 0x7f, + 0x29, 0xd1, 0x12, 0x73, 0x42, 0x84, 0x95, 0x4f, + 0x8e, 0x08, 0xe5, 0xcb + }; + + /* SHA-256 Known Digest Message (256-bits). */ + static const PRUint8 sha256_known_digest[] = { + 0x38, 0xa9, 0xc1, 0xf0, 0x35, 0xf6, 0x5d, 0x61, + 0x11, 0xd4, 0x0b, 0xdc, 0xce, 0x35, 0x14, 0x8d, + 0xf2, 0xdd, 0xaf, 0xaf, 0xcf, 0xb7, 0x87, 0xe9, + 0x96, 0xa5, 0xd2, 0x83, 0x62, 0x46, 0x56, 0x79 + }; + + /* SHA-384 Known Digest Message (384-bits). */ + static const PRUint8 sha384_known_digest[] = { + 0x11, 0xfe, 0x1c, 0x00, 0x89, 0x48, 0xde, 0xb3, + 0x99, 0xee, 0x1c, 0x18, 0xb4, 0x10, 0xfb, 0xfe, + 0xe3, 0xa8, 0x2c, 0xf3, 0x04, 0xb0, 0x2f, 0xc8, + 0xa3, 0xc4, 0x5e, 0xea, 0x7e, 0x60, 0x48, 0x7b, + 0xce, 0x2c, 0x62, 0xf7, 0xbc, 0xa7, 0xe8, 0xa3, + 0xcf, 0x24, 0xce, 0x9c, 0xe2, 0x8b, 0x09, 0x72 + }; + + /* SHA-512 Known Digest Message (512-bits). */ + static const PRUint8 sha512_known_digest[] = { + 0xc8, 0xb3, 0x27, 0xf9, 0x0b, 0x24, 0xc8, 0xbf, + 0x4c, 0xba, 0x33, 0x54, 0xf2, 0x31, 0xbf, 0xdb, + 0xab, 0xfd, 0xb3, 0x15, 0xd7, 0xfa, 0x48, 0x99, + 0x07, 0x60, 0x0f, 0x57, 0x41, 0x1a, 0xdd, 0x28, + 0x12, 0x55, 0x25, 0xac, 0xba, 0x3a, 0x99, 0x12, + 0x2c, 0x7a, 0x8f, 0x75, 0x3a, 0xe1, 0x06, 0x6f, + 0x30, 0x31, 0xc9, 0x33, 0xc6, 0x1b, 0x90, 0x1a, + 0x6c, 0x98, 0x9a, 0x87, 0xd0, 0xb2, 0xf8, 0x07 + }; + + /* SHA-X variables. */ + PRUint8 sha_computed_digest[HASH_LENGTH_MAX]; + SECStatus sha_status; + + /*************************************************/ + /* SHA-1 Single-Round Known Answer Hashing Test. */ + /*************************************************/ + + sha_status = SHA1_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha1_known_digest, + SHA1_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-224 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA224_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha224_known_digest, + SHA224_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-256 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA256_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha256_known_digest, + SHA256_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-384 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA384_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha384_known_digest, + SHA384_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-512 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA512_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha512_known_digest, + SHA512_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +static SECStatus +freebl_fips_RSA_PowerUpSelfTest(void) +{ + /* RSA Known Modulus used in both Public/Private Key Values (2048-bits). */ + static const PRUint8 rsa_modulus[FIPS_RSA_MODULUS_LENGTH] = { + 0xb8, 0x15, 0x00, 0x33, 0xda, 0x0c, 0x9d, 0xa5, + 0x14, 0x8c, 0xde, 0x1f, 0x23, 0x07, 0x54, 0xe2, + 0xc6, 0xb9, 0x51, 0x04, 0xc9, 0x65, 0x24, 0x6e, + 0x0a, 0x46, 0x34, 0x5c, 0x37, 0x86, 0x6b, 0x88, + 0x24, 0x27, 0xac, 0xa5, 0x02, 0x79, 0xfb, 0xed, + 0x75, 0xc5, 0x3f, 0x6e, 0xdf, 0x05, 0x5f, 0x0f, + 0x20, 0x70, 0xa0, 0x5b, 0x85, 0xdb, 0xac, 0xb9, + 0x5f, 0x02, 0xc2, 0x64, 0x1e, 0x84, 0x5b, 0x3e, + 0xad, 0xbf, 0xf6, 0x2e, 0x51, 0xd6, 0xad, 0xf7, + 0xa7, 0x86, 0x75, 0x86, 0xec, 0xa7, 0xe1, 0xf7, + 0x08, 0xbf, 0xdc, 0x56, 0xb1, 0x3b, 0xca, 0xd8, + 0xfc, 0x51, 0xdf, 0x9a, 0x2a, 0x37, 0x06, 0xf2, + 0xd1, 0x6b, 0x9a, 0x5e, 0x2a, 0xe5, 0x20, 0x57, + 0x35, 0x9f, 0x1f, 0x98, 0xcf, 0x40, 0xc7, 0xd6, + 0x98, 0xdb, 0xde, 0xf5, 0x64, 0x53, 0xf7, 0x9d, + 0x45, 0xf3, 0xd6, 0x78, 0xb9, 0xe3, 0xa3, 0x20, + 0xcd, 0x79, 0x43, 0x35, 0xef, 0xd7, 0xfb, 0xb9, + 0x80, 0x88, 0x27, 0x2f, 0x63, 0xa8, 0x67, 0x3d, + 0x4a, 0xfa, 0x06, 0xc6, 0xd2, 0x86, 0x0b, 0xa7, + 0x28, 0xfd, 0xe0, 0x1e, 0x93, 0x4b, 0x17, 0x2e, + 0xb0, 0x11, 0x6f, 0xc6, 0x2b, 0x98, 0x0f, 0x15, + 0xe3, 0x87, 0x16, 0x7a, 0x7c, 0x67, 0x3e, 0x12, + 0x2b, 0xf8, 0xbe, 0x48, 0xc1, 0x97, 0x47, 0xf4, + 0x1f, 0x81, 0x80, 0x12, 0x28, 0xe4, 0x7b, 0x1e, + 0xb7, 0x00, 0xa4, 0xde, 0xaa, 0xfb, 0x0f, 0x77, + 0x84, 0xa3, 0xd6, 0xb2, 0x03, 0x48, 0xdd, 0x53, + 0x8b, 0x46, 0x41, 0x28, 0x52, 0xc4, 0x53, 0xf0, + 0x1c, 0x95, 0xd9, 0x36, 0xe0, 0x0f, 0x26, 0x46, + 0x9c, 0x61, 0x0e, 0x80, 0xca, 0x86, 0xaf, 0x39, + 0x95, 0xe5, 0x60, 0x43, 0x61, 0x3e, 0x2b, 0xb4, + 0xe8, 0xbd, 0x8d, 0x77, 0x62, 0xf5, 0x32, 0x43, + 0x2f, 0x4b, 0x65, 0x82, 0x14, 0xdd, 0x29, 0x5b + }; + + /* RSA Known Public Key Values (24-bits). */ + static const PRUint8 rsa_public_exponent[FIPS_RSA_PUBLIC_EXPONENT_LENGTH] = { 0x01, 0x00, 0x01 }; + /* RSA Known Private Key Values (version is 8-bits), */ + /* (private exponent is 2048-bits), */ + /* (private prime0 is 1024-bits), */ + /* (private prime1 is 1024-bits), */ + /* (private prime exponent0 is 1024-bits), */ + /* (private prime exponent1 is 1024-bits), */ + /* and (private coefficient is 1024-bits). */ + static const PRUint8 rsa_version[] = { 0x00 }; + + static const PRUint8 rsa_private_exponent[FIPS_RSA_PRIVATE_EXPONENT_LENGTH] = { + 0x29, 0x08, 0x05, 0x53, 0x89, 0x76, 0xe6, 0x6c, + 0xb5, 0x77, 0xf0, 0xca, 0xdf, 0xf3, 0xf2, 0x67, + 0xda, 0x03, 0xd4, 0x9b, 0x4c, 0x88, 0xce, 0xe5, + 0xf8, 0x44, 0x4d, 0xc7, 0x80, 0x58, 0xe5, 0xff, + 0x22, 0x8f, 0xf5, 0x5b, 0x92, 0x81, 0xbe, 0x35, + 0xdf, 0xda, 0x67, 0x99, 0x3e, 0xfc, 0xe3, 0x83, + 0x6b, 0xa7, 0xaf, 0x16, 0xb7, 0x6f, 0x8f, 0xc0, + 0x81, 0xfd, 0x0b, 0x77, 0x65, 0x95, 0xfb, 0x00, + 0xad, 0x99, 0xec, 0x35, 0xc6, 0xe8, 0x23, 0x3e, + 0xe0, 0x88, 0x88, 0x09, 0xdb, 0x16, 0x50, 0xb7, + 0xcf, 0xab, 0x74, 0x61, 0x9e, 0x7f, 0xc5, 0x67, + 0x38, 0x56, 0xc7, 0x90, 0x85, 0x78, 0x5e, 0x84, + 0x21, 0x49, 0xea, 0xce, 0xb2, 0xa0, 0xff, 0xe4, + 0x70, 0x7f, 0x57, 0x7b, 0xa8, 0x36, 0xb8, 0x54, + 0x8d, 0x1d, 0xf5, 0x44, 0x9d, 0x68, 0x59, 0xf9, + 0x24, 0x6e, 0x85, 0x8f, 0xc3, 0x5f, 0x8a, 0x2c, + 0x94, 0xb7, 0xbc, 0x0e, 0xa5, 0xef, 0x93, 0x06, + 0x38, 0xcd, 0x07, 0x0c, 0xae, 0xb8, 0x44, 0x1a, + 0xd8, 0xe7, 0xf5, 0x9a, 0x1e, 0x9c, 0x18, 0xc7, + 0x6a, 0xc2, 0x7f, 0x28, 0x01, 0x4f, 0xb4, 0xb8, + 0x90, 0x97, 0x5a, 0x43, 0x38, 0xad, 0xe8, 0x95, + 0x68, 0x83, 0x1a, 0x1b, 0x10, 0x07, 0xe6, 0x02, + 0x52, 0x1f, 0xbf, 0x76, 0x6b, 0x46, 0xd6, 0xfb, + 0xc3, 0xbe, 0xb5, 0xac, 0x52, 0x53, 0x01, 0x1c, + 0xf3, 0xc5, 0xeb, 0x64, 0xf2, 0x1e, 0xc4, 0x38, + 0xe9, 0xaa, 0xd9, 0xc3, 0x72, 0x51, 0xa5, 0x44, + 0x58, 0x69, 0x0b, 0x1b, 0x98, 0x7f, 0xf2, 0x23, + 0xff, 0xeb, 0xf0, 0x75, 0x24, 0xcf, 0xc5, 0x1e, + 0xb8, 0x6a, 0xc5, 0x2f, 0x4f, 0x23, 0x50, 0x7d, + 0x15, 0x9d, 0x19, 0x7a, 0x0b, 0x82, 0xe0, 0x21, + 0x5b, 0x5f, 0x9d, 0x50, 0x2b, 0x83, 0xe4, 0x48, + 0xcc, 0x39, 0xe5, 0xfb, 0x13, 0x7b, 0x6f, 0x81 + }; + + static const PRUint8 rsa_prime0[FIPS_RSA_PRIME0_LENGTH] = { + 0xe4, 0xbf, 0x21, 0x62, 0x9b, 0xa9, 0x77, 0x40, + 0x8d, 0x2a, 0xce, 0xa1, 0x67, 0x5a, 0x4c, 0x96, + 0x45, 0x98, 0x67, 0xbd, 0x75, 0x22, 0x33, 0x6f, + 0xe6, 0xcb, 0x77, 0xde, 0x9e, 0x97, 0x7d, 0x96, + 0x8c, 0x5e, 0x5d, 0x34, 0xfb, 0x27, 0xfc, 0x6d, + 0x74, 0xdb, 0x9d, 0x2e, 0x6d, 0xf6, 0xea, 0xfc, + 0xce, 0x9e, 0xda, 0xa7, 0x25, 0xa2, 0xf4, 0x58, + 0x6d, 0x0a, 0x3f, 0x01, 0xc2, 0xb4, 0xab, 0x38, + 0xc1, 0x14, 0x85, 0xb6, 0xfa, 0x94, 0xc3, 0x85, + 0xf9, 0x3c, 0x2e, 0x96, 0x56, 0x01, 0xe7, 0xd6, + 0x14, 0x71, 0x4f, 0xfb, 0x4c, 0x85, 0x52, 0xc4, + 0x61, 0x1e, 0xa5, 0x1e, 0x96, 0x13, 0x0d, 0x8f, + 0x66, 0xae, 0xa0, 0xcd, 0x7d, 0x25, 0x66, 0x19, + 0x15, 0xc2, 0xcf, 0xc3, 0x12, 0x3c, 0xe8, 0xa4, + 0x52, 0x4c, 0xcb, 0x28, 0x3c, 0xc4, 0xbf, 0x95, + 0x33, 0xe3, 0x81, 0xea, 0x0c, 0x6c, 0xa2, 0x05 + }; + static const PRUint8 rsa_prime1[FIPS_RSA_PRIME1_LENGTH] = { + 0xce, 0x03, 0x94, 0xf4, 0xa9, 0x2c, 0x1e, 0x06, + 0xe7, 0x40, 0x30, 0x01, 0xf7, 0xbb, 0x68, 0x8c, + 0x27, 0xd2, 0x15, 0xe3, 0x28, 0x49, 0x5b, 0xa8, + 0xc1, 0x9a, 0x42, 0x7e, 0x31, 0xf9, 0x08, 0x34, + 0x81, 0xa2, 0x0f, 0x04, 0x61, 0x34, 0xe3, 0x36, + 0x92, 0xb1, 0x09, 0x2b, 0xe9, 0xef, 0x84, 0x88, + 0xbe, 0x9c, 0x98, 0x60, 0xa6, 0x60, 0x84, 0xe9, + 0x75, 0x6f, 0xcc, 0x81, 0xd1, 0x96, 0xef, 0xdd, + 0x2e, 0xca, 0xc4, 0xf5, 0x42, 0xfb, 0x13, 0x2b, + 0x57, 0xbf, 0x14, 0x5e, 0xc2, 0x7f, 0x77, 0x35, + 0x29, 0xc4, 0xe5, 0xe0, 0xf9, 0x6d, 0x15, 0x4a, + 0x42, 0x56, 0x1c, 0x3e, 0x0c, 0xc5, 0xce, 0x70, + 0x08, 0x63, 0x1e, 0x73, 0xdb, 0x7e, 0x74, 0x05, + 0x32, 0x01, 0xc6, 0x36, 0x32, 0x75, 0x6b, 0xed, + 0x9d, 0xfe, 0x7c, 0x7e, 0xa9, 0x57, 0xb4, 0xe9, + 0x22, 0xe4, 0xe7, 0xfe, 0x36, 0x07, 0x9b, 0xdf + }; + static const PRUint8 rsa_exponent0[FIPS_RSA_EXPONENT0_LENGTH] = { + 0x04, 0x5a, 0x3a, 0xa9, 0x64, 0xaa, 0xd9, 0xd1, + 0x09, 0x9e, 0x99, 0xe5, 0xea, 0x50, 0x86, 0x8a, + 0x89, 0x72, 0x77, 0xee, 0xdb, 0xee, 0xb5, 0xa9, + 0xd8, 0x6b, 0x60, 0xb1, 0x84, 0xb4, 0xff, 0x37, + 0xc1, 0x1d, 0xfe, 0x8a, 0x06, 0x89, 0x61, 0x3d, + 0x37, 0xef, 0x01, 0xd3, 0xa3, 0x56, 0x02, 0x6c, + 0xa3, 0x05, 0xd4, 0xc5, 0x3f, 0x6b, 0x15, 0x59, + 0x25, 0x61, 0xff, 0x86, 0xea, 0x0c, 0x84, 0x01, + 0x85, 0x72, 0xfd, 0x84, 0x58, 0xca, 0x41, 0xda, + 0x27, 0xbe, 0xe4, 0x68, 0x09, 0xe4, 0xe9, 0x63, + 0x62, 0x6a, 0x31, 0x8a, 0x67, 0x8f, 0x55, 0xde, + 0xd4, 0xb6, 0x3f, 0x90, 0x10, 0x6c, 0xf6, 0x62, + 0x17, 0x23, 0x15, 0x7e, 0x33, 0x76, 0x65, 0xb5, + 0xee, 0x7b, 0x11, 0x76, 0xf5, 0xbe, 0xe0, 0xf2, + 0x57, 0x7a, 0x8c, 0x97, 0x0c, 0x68, 0xf5, 0xf8, + 0x41, 0xcf, 0x7f, 0x66, 0x53, 0xac, 0x31, 0x7d + }; + static const PRUint8 rsa_exponent1[FIPS_RSA_EXPONENT1_LENGTH] = { + 0x93, 0x54, 0x14, 0x6e, 0x73, 0x9d, 0x4d, 0x4b, + 0xfa, 0x8c, 0xf8, 0xc8, 0x2f, 0x76, 0x22, 0xea, + 0x38, 0x80, 0x11, 0x8f, 0x05, 0xfc, 0x90, 0x44, + 0x3b, 0x50, 0x2a, 0x45, 0x3d, 0x4f, 0xaf, 0x02, + 0x7d, 0xc2, 0x7b, 0xa2, 0xd2, 0x31, 0x94, 0x5c, + 0x2e, 0xc3, 0xd4, 0x9f, 0x47, 0x09, 0x37, 0x6a, + 0xe3, 0x85, 0xf1, 0xa3, 0x0c, 0xd8, 0xf1, 0xb4, + 0x53, 0x7b, 0xc4, 0x71, 0x02, 0x86, 0x42, 0xbb, + 0x96, 0xff, 0x03, 0xa3, 0xb2, 0x67, 0x03, 0xea, + 0x77, 0x31, 0xfb, 0x4b, 0x59, 0x24, 0xf7, 0x07, + 0x59, 0xfb, 0xa9, 0xba, 0x1e, 0x26, 0x58, 0x97, + 0x66, 0xa1, 0x56, 0x49, 0x39, 0xb1, 0x2c, 0x55, + 0x0a, 0x6a, 0x78, 0x18, 0xba, 0xdb, 0xcf, 0xf4, + 0xf7, 0x32, 0x35, 0xa2, 0x04, 0xab, 0xdc, 0xa7, + 0x6d, 0xd9, 0xd5, 0x06, 0x6f, 0xec, 0x7d, 0x40, + 0x4c, 0xe8, 0x0e, 0xd0, 0xc9, 0xaa, 0xdf, 0x59 + }; + static const PRUint8 rsa_coefficient[FIPS_RSA_COEFFICIENT_LENGTH] = { + 0x17, 0xd7, 0xf5, 0x0a, 0xf0, 0x68, 0x97, 0x96, + 0xc4, 0x29, 0x18, 0x77, 0x9a, 0x1f, 0xe3, 0xf3, + 0x12, 0x13, 0x0f, 0x7e, 0x7b, 0xb9, 0xc1, 0x91, + 0xf9, 0xc7, 0x08, 0x56, 0x5c, 0xa4, 0xbc, 0x83, + 0x71, 0xf9, 0x78, 0xd9, 0x2b, 0xec, 0xfe, 0x6b, + 0xdc, 0x2f, 0x63, 0xc9, 0xcd, 0x50, 0x14, 0x5b, + 0xd3, 0x6e, 0x85, 0x4d, 0x0c, 0xa2, 0x0b, 0xa0, + 0x09, 0xb6, 0xca, 0x34, 0x9c, 0xc2, 0xc1, 0x4a, + 0xb0, 0xbc, 0x45, 0x93, 0xa5, 0x7e, 0x99, 0xb5, + 0xbd, 0xe4, 0x69, 0x29, 0x08, 0x28, 0xd2, 0xcd, + 0xab, 0x24, 0x78, 0x48, 0x41, 0x26, 0x0b, 0x37, + 0xa3, 0x43, 0xd1, 0x95, 0x1a, 0xd6, 0xee, 0x22, + 0x1c, 0x00, 0x0b, 0xc2, 0xb7, 0xa4, 0xa3, 0x21, + 0xa9, 0xcd, 0xe4, 0x69, 0xd3, 0x45, 0x02, 0xb1, + 0xb7, 0x3a, 0xbf, 0x51, 0x35, 0x1b, 0x78, 0xc2, + 0xcf, 0x0c, 0x0d, 0x60, 0x09, 0xa9, 0x44, 0x02 + }; + + /* RSA Known Plaintext Message (1024-bits). */ + static const PRUint8 rsa_known_plaintext_msg[FIPS_RSA_MESSAGE_LENGTH] = { + "Known plaintext message utilized" + "for RSA Encryption & Decryption" + "blocks SHA256, SHA384 and " + "SHA512 RSA Signature KAT tests. " + "Known plaintext message utilized" + "for RSA Encryption & Decryption" + "blocks SHA256, SHA384 and " + "SHA512 RSA Signature KAT tests." + }; + + /* RSA Known Ciphertext (2048-bits). */ + static const PRUint8 rsa_known_ciphertext[] = { + 0x04, 0x12, 0x46, 0xe3, 0x6a, 0xee, 0xde, 0xdd, + 0x49, 0xa1, 0xd9, 0x83, 0xf7, 0x35, 0xf9, 0x70, + 0x88, 0x03, 0x2d, 0x01, 0x8b, 0xd1, 0xbf, 0xdb, + 0xe5, 0x1c, 0x85, 0xbe, 0xb5, 0x0b, 0x48, 0x45, + 0x7a, 0xf0, 0xa0, 0xe3, 0xa2, 0xbb, 0x4b, 0xf6, + 0x27, 0xd0, 0x1b, 0x12, 0xe3, 0x77, 0x52, 0x34, + 0x9e, 0x8e, 0x03, 0xd2, 0xf8, 0x79, 0x6e, 0x39, + 0x79, 0x53, 0x3c, 0x44, 0x14, 0x94, 0xbb, 0x8d, + 0xaa, 0x14, 0x44, 0xa0, 0x7b, 0xa5, 0x8c, 0x93, + 0x5f, 0x99, 0xa4, 0xa3, 0x6e, 0x7a, 0x38, 0x40, + 0x78, 0xfa, 0x36, 0x91, 0x5e, 0x9a, 0x9c, 0xba, + 0x1e, 0xd4, 0xf9, 0xda, 0x4b, 0x0f, 0xa8, 0xa3, + 0x1c, 0xf3, 0x3a, 0xd1, 0xa5, 0xb4, 0x51, 0x16, + 0xed, 0x4b, 0xcf, 0xec, 0x93, 0x7b, 0x90, 0x21, + 0xbc, 0x3a, 0xf4, 0x0b, 0xd1, 0x3a, 0x2b, 0xba, + 0xa6, 0x7d, 0x5b, 0x53, 0xd8, 0x64, 0xf9, 0x29, + 0x7b, 0x7f, 0x77, 0x3e, 0x51, 0x4c, 0x9a, 0x94, + 0xd2, 0x4b, 0x4a, 0x8d, 0x61, 0x74, 0x97, 0xae, + 0x53, 0x6a, 0xf4, 0x90, 0xc2, 0x2c, 0x49, 0xe2, + 0xfa, 0xeb, 0x91, 0xc5, 0xe5, 0x83, 0x13, 0xc9, + 0x44, 0x4b, 0x95, 0x2c, 0x57, 0x70, 0x15, 0x5c, + 0x64, 0x8d, 0x1a, 0xfd, 0x2a, 0xc7, 0xb2, 0x9c, + 0x5c, 0x99, 0xd3, 0x4a, 0xfd, 0xdd, 0xf6, 0x82, + 0x87, 0x8c, 0x5a, 0xc4, 0xa8, 0x0d, 0x2a, 0xef, + 0xc3, 0xa2, 0x7e, 0x8e, 0x67, 0x9f, 0x6f, 0x63, + 0xdb, 0xbb, 0x1d, 0x31, 0xc4, 0xbb, 0xbc, 0x13, + 0x3f, 0x54, 0xc6, 0xf6, 0xc5, 0x28, 0x32, 0xab, + 0x96, 0x42, 0x10, 0x36, 0x40, 0x92, 0xbb, 0x57, + 0x55, 0x38, 0xf5, 0x43, 0x7e, 0x43, 0xc4, 0x65, + 0x47, 0x64, 0xaa, 0x0f, 0x4c, 0xe9, 0x49, 0x16, + 0xec, 0x6a, 0x50, 0xfd, 0x14, 0x49, 0xca, 0xdb, + 0x44, 0x54, 0xca, 0xbe, 0xa3, 0x0e, 0x5f, 0xef + }; + + static const RSAPublicKey bl_public_key = { + NULL, + { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus, + FIPS_RSA_MODULUS_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent, + FIPS_RSA_PUBLIC_EXPONENT_LENGTH } + }; + static const RSAPrivateKey bl_private_key = { + NULL, + { FIPS_RSA_TYPE, (unsigned char *)rsa_version, + FIPS_RSA_PRIVATE_VERSION_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus, + FIPS_RSA_MODULUS_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent, + FIPS_RSA_PUBLIC_EXPONENT_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_private_exponent, + FIPS_RSA_PRIVATE_EXPONENT_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_prime0, + FIPS_RSA_PRIME0_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_prime1, + FIPS_RSA_PRIME1_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent0, + FIPS_RSA_EXPONENT0_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent1, + FIPS_RSA_EXPONENT1_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_coefficient, + FIPS_RSA_COEFFICIENT_LENGTH } + }; + + /* RSA variables. */ + SECStatus rsa_status; + RSAPublicKey rsa_public_key; + RSAPrivateKey rsa_private_key; + + PRUint8 rsa_computed_ciphertext[FIPS_RSA_ENCRYPT_LENGTH]; + PRUint8 rsa_computed_plaintext[FIPS_RSA_DECRYPT_LENGTH]; + + rsa_public_key = bl_public_key; + rsa_private_key = bl_private_key; + + /**************************************************/ + /* RSA Single-Round Known Answer Encryption Test. */ + /**************************************************/ + + /* Perform RSA Public Key Encryption. */ + rsa_status = RSA_PublicKeyOp(&rsa_public_key, + rsa_computed_ciphertext, + rsa_known_plaintext_msg); + + if ((rsa_status != SECSuccess) || + (PORT_Memcmp(rsa_computed_ciphertext, rsa_known_ciphertext, + FIPS_RSA_ENCRYPT_LENGTH) != 0)) + goto rsa_loser; + + /**************************************************/ + /* RSA Single-Round Known Answer Decryption Test. */ + /**************************************************/ + + /* Perform RSA Private Key Decryption. */ + rsa_status = RSA_PrivateKeyOp(&rsa_private_key, + rsa_computed_plaintext, + rsa_known_ciphertext); + + if ((rsa_status != SECSuccess) || + (PORT_Memcmp(rsa_computed_plaintext, rsa_known_plaintext_msg, + FIPS_RSA_DECRYPT_LENGTH) != 0)) + goto rsa_loser; + + return (SECSuccess); + +rsa_loser: + + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); +} + +static SECStatus +freebl_fips_ECDSA_Test(ECParams *ecparams, + const PRUint8 *knownSignature, + unsigned int knownSignatureLen) +{ + + /* ECDSA Known Seed info for curves nistp256 and nistk283 */ + static const PRUint8 ecdsa_Known_Seed[] = { + 0x6a, 0x9b, 0xf6, 0xf7, 0xce, 0xed, 0x79, 0x11, + 0xf0, 0xc7, 0xc8, 0x9a, 0xa5, 0xd1, 0x57, 0xb1, + 0x7b, 0x5a, 0x3b, 0x76, 0x4e, 0x7b, 0x7c, 0xbc, + 0xf2, 0x76, 0x1c, 0x1c, 0x7f, 0xc5, 0x53, 0x2f + }; + + static const PRUint8 msg[] = { + "Firefox and ThunderBird are awesome!" + }; + + unsigned char sha256[SHA256_LENGTH]; /* SHA-256 hash (256 bits) */ + unsigned char sig[2 * MAX_ECKEY_LEN]; + SECItem signature, digest; + ECPrivateKey *ecdsa_private_key = NULL; + ECPublicKey ecdsa_public_key; + SECStatus ecdsaStatus = SECSuccess; + + /* Generates a new EC key pair. The private key is a supplied + * random value (in seed) and the public key is the result of + * performing a scalar point multiplication of that value with + * the curve's base point. + */ + ecdsaStatus = EC_NewKeyFromSeed(ecparams, &ecdsa_private_key, + ecdsa_Known_Seed, + sizeof(ecdsa_Known_Seed)); + if (ecdsaStatus != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /* construct public key from private key. */ + ecdsa_public_key.ecParams = ecdsa_private_key->ecParams; + ecdsa_public_key.publicValue = ecdsa_private_key->publicValue; + + /* validate public key value */ + ecdsaStatus = EC_ValidatePublicKey(&ecdsa_public_key.ecParams, + &ecdsa_public_key.publicValue); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + + /* validate public key value */ + ecdsaStatus = EC_ValidatePublicKey(&ecdsa_private_key->ecParams, + &ecdsa_private_key->publicValue); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + + /***************************************************/ + /* ECDSA Single-Round Known Answer Signature Test. */ + /***************************************************/ + + ecdsaStatus = SHA256_HashBuf(sha256, msg, sizeof msg); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + digest.type = siBuffer; + digest.data = sha256; + digest.len = SHA256_LENGTH; + + memset(sig, 0, sizeof sig); + signature.type = siBuffer; + signature.data = sig; + signature.len = sizeof sig; + + ecdsaStatus = ECDSA_SignDigestWithSeed(ecdsa_private_key, &signature, + &digest, ecdsa_Known_Seed, sizeof ecdsa_Known_Seed); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + + if ((signature.len != knownSignatureLen) || + (PORT_Memcmp(signature.data, knownSignature, + knownSignatureLen) != 0)) { + ecdsaStatus = SECFailure; + goto loser; + } + + /******************************************************/ + /* ECDSA Single-Round Known Answer Verification Test. */ + /******************************************************/ + + /* Perform ECDSA verification process. */ + ecdsaStatus = ECDSA_VerifyDigest(&ecdsa_public_key, &signature, &digest); + +loser: + /* free the memory for the private key arena*/ + PORT_FreeArena(ecdsa_private_key->ecParams.arena, PR_FALSE); + + if (ecdsaStatus != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + return (SECSuccess); +} + +static SECStatus +freebl_fips_ECDH_Test(ECParams *ecparams) +{ + + /* ECDH Known result (reused old CAVS vector) */ + static const PRUint8 ecdh_known_pub_key_1[] = { + EC_POINT_FORM_UNCOMPRESSED, + /* pubX */ + 0x16, 0x81, 0x32, 0x86, 0xc8, 0xe4, 0x3a, 0x1f, + 0x5d, 0xe3, 0x06, 0x22, 0x8b, 0x99, 0x14, 0x25, + 0xf7, 0x9c, 0x5b, 0x1e, 0x96, 0x84, 0x85, 0x3b, + 0x17, 0xfe, 0xf3, 0x1c, 0x0e, 0xed, 0xc4, 0xce, + /* pubY */ + 0x7a, 0x44, 0xfe, 0xbd, 0x91, 0x71, 0x7d, 0x73, + 0xd9, 0x45, 0xea, 0xae, 0x66, 0x78, 0xfa, 0x6e, + 0x46, 0xcd, 0xfa, 0x95, 0x15, 0x47, 0x62, 0x5d, + 0xbb, 0x1b, 0x9f, 0xe6, 0x39, 0xfc, 0xfd, 0x47 + }; + static const PRUint8 ecdh_known_priv_key_2[] = { + 0xb4, 0x2a, 0xe3, 0x69, 0x19, 0xec, 0xf0, 0x42, + 0x6d, 0x45, 0x8c, 0x94, 0x4a, 0x26, 0xa7, 0x5c, + 0xea, 0x9d, 0xd9, 0x0f, 0x59, 0xe0, 0x1a, 0x9d, + 0x7c, 0xb7, 0x1c, 0x04, 0x53, 0xb8, 0x98, 0x5a + }; + static const PRUint8 ecdh_known_hash_result[] = { + 0x16, 0xf3, 0x85, 0xa2, 0x41, 0xf3, 0x7f, 0xc4, + 0x0b, 0x56, 0x47, 0xee, 0xa7, 0x74, 0xb9, 0xdb, + 0xe1, 0xfa, 0x22, 0xe9, 0x04, 0xf1, 0xb6, 0x12, + 0x4b, 0x44, 0x8a, 0xbb, 0xbc, 0x08, 0x2b, 0xa7 + }; + + SECItem ecdh_priv_2, ecdh_pub_1; + SECItem ZZ = { 0, 0, 0 }; + SECStatus ecdhStatus = SECSuccess; + PRUint8 computed_hash_result[HASH_LENGTH_MAX]; + + ecdh_priv_2.data = (PRUint8 *)ecdh_known_priv_key_2; + ecdh_priv_2.len = sizeof(ecdh_known_priv_key_2); + ecdh_pub_1.data = (PRUint8 *)ecdh_known_pub_key_1; + ecdh_pub_1.len = sizeof(ecdh_known_pub_key_1); + + /* Generates a new EC key pair. The private key is a supplied + * random value (in seed) and the public key is the result of + * performing a scalar point multiplication of that value with + * the curve's base point. + */ + ecdhStatus = ECDH_Derive(&ecdh_pub_1, ecparams, &ecdh_priv_2, PR_FALSE, &ZZ); + if (ecdhStatus != SECSuccess) { + goto loser; + } + ecdhStatus = SHA256_HashBuf(computed_hash_result, ZZ.data, ZZ.len); + if (ecdhStatus != SECSuccess) { + goto loser; + } + + if (PORT_Memcmp(computed_hash_result, ecdh_known_hash_result, + sizeof(ecdh_known_hash_result)) != 0) { + ecdhStatus = SECFailure; + goto loser; + } + +loser: + if (ZZ.data) { + SECITEM_FreeItem(&ZZ, PR_FALSE); + } + + if (ecdhStatus != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + return (SECSuccess); +} + +static SECStatus +freebl_fips_EC_PowerUpSelfTest() +{ + + /* EC Known curve nistp256 == ECCCurve_X9_62_PRIME_256V1 params */ + static const unsigned char p256_prime[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF + }; + static const unsigned char p256_a[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC + }; + static const unsigned char p256_b[] = { + 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55, 0x76, + 0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, 0x3B, 0xCE, + 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B + }; + static const unsigned char p256_base[] = { + 0x04, + 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, + 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, + 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96, + 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, + 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, + 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5 + }; + static const unsigned char p256_order[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, + 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51 + }; + static const unsigned char p256_encoding[] = { + 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07 + }; + static const ECParams ec_known_P256_Params = { + NULL, + ec_params_named, /* arena, type */ + /* fieldID */ + { 256, ec_field_GFp, /* size and type */ + { { siBuffer, (unsigned char *)p256_prime, sizeof(p256_prime) } }, /* u.prime */ + 0, + 0, + 0 }, + /* curve */ + { /* a = curvea b = curveb */ + /* curve.a */ + { siBuffer, (unsigned char *)p256_a, sizeof(p256_a) }, + /* curve.b */ + { siBuffer, (unsigned char *)p256_b, sizeof(p256_b) }, + /* curve.seed */ + { siBuffer, NULL, 0 } }, + /* base = 04xy*/ + { siBuffer, (unsigned char *)p256_base, sizeof(p256_base) }, + /* order */ + { siBuffer, (unsigned char *)p256_order, sizeof(p256_order) }, + 1, /* cofactor */ + /* DEREncoding */ + { siBuffer, (unsigned char *)p256_encoding, sizeof(p256_encoding) }, + ECCurve_X9_62_PRIME_256V1, + /* curveOID */ + { siBuffer, (unsigned char *)(p256_encoding) + 2, sizeof(p256_encoding) - 2 }, + }; + + static const PRUint8 ecdsa_known_P256_signature[] = { + 0x07, 0xb1, 0xcb, 0x57, 0x20, 0xa7, 0x10, 0xd6, + 0x9d, 0x37, 0x4b, 0x1c, 0xdc, 0x35, 0x90, 0xff, + 0x1a, 0x2d, 0x98, 0x95, 0x1b, 0x2f, 0xeb, 0x7f, + 0xbb, 0x81, 0xca, 0xc0, 0x69, 0x75, 0xea, 0xc5, + 0xa7, 0xd2, 0x20, 0xdd, 0x45, 0xf9, 0x2b, 0xdd, + 0xda, 0x98, 0x99, 0x5b, 0x1c, 0x02, 0x3a, 0x27, + 0x8b, 0x7d, 0xb6, 0xed, 0x0e, 0xe0, 0xa7, 0xac, + 0xaa, 0x36, 0x2c, 0xfa, 0x1a, 0xdf, 0x0d, 0xe1 + }; + + ECParams ecparams; + + SECStatus rv; + + /* ECDSA GF(p) prime field curve test */ + ecparams = ec_known_P256_Params; + rv = freebl_fips_ECDSA_Test(&ecparams, + ecdsa_known_P256_signature, + sizeof ecdsa_known_P256_signature); + if (rv != SECSuccess) { + return (SECFailure); + } + /* ECDH GF(p) prime field curve test */ + rv = freebl_fips_ECDH_Test(&ecparams); + if (rv != SECSuccess) { + return (SECFailure); + } + + return (SECSuccess); +} + +static SECStatus +freebl_fips_DSA_PowerUpSelfTest(void) +{ + /* DSA Known P (1024-bits), Q (160-bits), and G (1024-bits) Values. */ + static const PRUint8 dsa_P[] = { + 0x80, 0xb0, 0xd1, 0x9d, 0x6e, 0xa4, 0xf3, 0x28, + 0x9f, 0x24, 0xa9, 0x8a, 0x49, 0xd0, 0x0c, 0x63, + 0xe8, 0x59, 0x04, 0xf9, 0x89, 0x4a, 0x5e, 0xc0, + 0x6d, 0xd2, 0x67, 0x6b, 0x37, 0x81, 0x83, 0x0c, + 0xfe, 0x3a, 0x8a, 0xfd, 0xa0, 0x3b, 0x08, 0x91, + 0x1c, 0xcb, 0xb5, 0x63, 0xb0, 0x1c, 0x70, 0xd0, + 0xae, 0xe1, 0x60, 0x2e, 0x12, 0xeb, 0x54, 0xc7, + 0xcf, 0xc6, 0xcc, 0xae, 0x97, 0x52, 0x32, 0x63, + 0xd3, 0xeb, 0x55, 0xea, 0x2f, 0x4c, 0xd5, 0xd7, + 0x3f, 0xda, 0xec, 0x49, 0x27, 0x0b, 0x14, 0x56, + 0xc5, 0x09, 0xbe, 0x4d, 0x09, 0x15, 0x75, 0x2b, + 0xa3, 0x42, 0x0d, 0x03, 0x71, 0xdf, 0x0f, 0xf4, + 0x0e, 0xe9, 0x0c, 0x46, 0x93, 0x3d, 0x3f, 0xa6, + 0x6c, 0xdb, 0xca, 0xe5, 0xac, 0x96, 0xc8, 0x64, + 0x5c, 0xec, 0x4b, 0x35, 0x65, 0xfc, 0xfb, 0x5a, + 0x1b, 0x04, 0x1b, 0xa1, 0x0e, 0xfd, 0x88, 0x15 + }; + + static const PRUint8 dsa_Q[] = { + 0xad, 0x22, 0x59, 0xdf, 0xe5, 0xec, 0x4c, 0x6e, + 0xf9, 0x43, 0xf0, 0x4b, 0x2d, 0x50, 0x51, 0xc6, + 0x91, 0x99, 0x8b, 0xcf + }; + + static const PRUint8 dsa_G[] = { + 0x78, 0x6e, 0xa9, 0xd8, 0xcd, 0x4a, 0x85, 0xa4, + 0x45, 0xb6, 0x6e, 0x5d, 0x21, 0x50, 0x61, 0xf6, + 0x5f, 0xdf, 0x5c, 0x7a, 0xde, 0x0d, 0x19, 0xd3, + 0xc1, 0x3b, 0x14, 0xcc, 0x8e, 0xed, 0xdb, 0x17, + 0xb6, 0xca, 0xba, 0x86, 0xa9, 0xea, 0x51, 0x2d, + 0xc1, 0xa9, 0x16, 0xda, 0xf8, 0x7b, 0x59, 0x8a, + 0xdf, 0xcb, 0xa4, 0x67, 0x00, 0x44, 0xea, 0x24, + 0x73, 0xe5, 0xcb, 0x4b, 0xaf, 0x2a, 0x31, 0x25, + 0x22, 0x28, 0x3f, 0x16, 0x10, 0x82, 0xf7, 0xeb, + 0x94, 0x0d, 0xdd, 0x09, 0x22, 0x14, 0x08, 0x79, + 0xba, 0x11, 0x0b, 0xf1, 0xff, 0x2d, 0x67, 0xac, + 0xeb, 0xb6, 0x55, 0x51, 0x69, 0x97, 0xa7, 0x25, + 0x6b, 0x9c, 0xa0, 0x9b, 0xd5, 0x08, 0x9b, 0x27, + 0x42, 0x1c, 0x7a, 0x69, 0x57, 0xe6, 0x2e, 0xed, + 0xa9, 0x5b, 0x25, 0xe8, 0x1f, 0xd2, 0xed, 0x1f, + 0xdf, 0xe7, 0x80, 0x17, 0xba, 0x0d, 0x4d, 0x38 + }; + + /* DSA Known Random Values (known random key block is 160-bits) */ + /* and (known random signature block is 160-bits). */ + static const PRUint8 dsa_known_random_key_block[] = { + "Mozilla Rules World!" + }; + static const PRUint8 dsa_known_random_signature_block[] = { + "Random DSA Signature" + }; + + /* DSA Known Digest (160-bits) */ + static const PRUint8 dsa_known_digest[] = { "DSA Signature Digest" }; + + /* DSA Known Signature (320-bits). */ + static const PRUint8 dsa_known_signature[] = { + 0x25, 0x7c, 0x3a, 0x79, 0x32, 0x45, 0xb7, 0x32, + 0x70, 0xca, 0x62, 0x63, 0x2b, 0xf6, 0x29, 0x2c, + 0x22, 0x2a, 0x03, 0xce, 0x48, 0x15, 0x11, 0x72, + 0x7b, 0x7e, 0xf5, 0x7a, 0xf3, 0x10, 0x3b, 0xde, + 0x34, 0xc1, 0x9e, 0xd7, 0x27, 0x9e, 0x77, 0x38 + }; + + /* DSA variables. */ + DSAPrivateKey *dsa_private_key; + SECStatus dsa_status; + SECItem dsa_signature_item; + SECItem dsa_digest_item; + DSAPublicKey dsa_public_key; + PRUint8 dsa_computed_signature[FIPS_DSA_SIGNATURE_LENGTH]; + static const PQGParams dsa_pqg = { + NULL, + { FIPS_DSA_TYPE, (unsigned char *)dsa_P, FIPS_DSA_PRIME_LENGTH }, + { FIPS_DSA_TYPE, (unsigned char *)dsa_Q, FIPS_DSA_SUBPRIME_LENGTH }, + { FIPS_DSA_TYPE, (unsigned char *)dsa_G, FIPS_DSA_BASE_LENGTH } + }; + + /*******************************************/ + /* Generate a DSA public/private key pair. */ + /*******************************************/ + + /* Generate a DSA public/private key pair. */ + dsa_status = DSA_NewKeyFromSeed(&dsa_pqg, dsa_known_random_key_block, + &dsa_private_key); + + if (dsa_status != SECSuccess) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + /* construct public key from private key. */ + dsa_public_key.params = dsa_private_key->params; + dsa_public_key.publicValue = dsa_private_key->publicValue; + + /*************************************************/ + /* DSA Single-Round Known Answer Signature Test. */ + /*************************************************/ + + dsa_signature_item.data = dsa_computed_signature; + dsa_signature_item.len = sizeof dsa_computed_signature; + + dsa_digest_item.data = (unsigned char *)dsa_known_digest; + dsa_digest_item.len = SHA1_LENGTH; + + /* Perform DSA signature process. */ + dsa_status = DSA_SignDigestWithSeed(dsa_private_key, + &dsa_signature_item, + &dsa_digest_item, + dsa_known_random_signature_block); + + if ((dsa_status != SECSuccess) || + (dsa_signature_item.len != FIPS_DSA_SIGNATURE_LENGTH) || + (PORT_Memcmp(dsa_computed_signature, dsa_known_signature, + FIPS_DSA_SIGNATURE_LENGTH) != 0)) { + dsa_status = SECFailure; + } else { + + /****************************************************/ + /* DSA Single-Round Known Answer Verification Test. */ + /****************************************************/ + + /* Perform DSA verification process. */ + dsa_status = DSA_VerifyDigest(&dsa_public_key, + &dsa_signature_item, + &dsa_digest_item); + } + + PORT_FreeArena(dsa_private_key->params.arena, PR_TRUE); + /* Don't free public key, it uses same arena as private key */ + + /* Verify DSA signature. */ + if (dsa_status != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + return (SECSuccess); +} + +static SECStatus +freebl_fips_DH_PowerUpSelfTest(void) +{ + /* DH Known P (2048-bits) */ + static const PRUint8 dh_known_P[] = { + 0xc2, 0x79, 0xbb, 0x76, 0x32, 0x0d, 0x43, 0xfd, + 0x1b, 0x8c, 0xa2, 0x3c, 0x00, 0xdd, 0x6d, 0xef, + 0xf8, 0x1a, 0xd9, 0xc1, 0xa2, 0xf5, 0x73, 0x2b, + 0xdb, 0x1a, 0x3e, 0x84, 0x90, 0xeb, 0xe7, 0x8e, + 0x5f, 0x5c, 0x6b, 0xb6, 0x61, 0x89, 0xd1, 0x03, + 0xb0, 0x5f, 0x91, 0xe4, 0xd2, 0x82, 0x90, 0xfc, + 0x3c, 0x49, 0x69, 0x59, 0xc1, 0x51, 0x6a, 0x85, + 0x71, 0xe7, 0x5d, 0x72, 0x5a, 0x45, 0xad, 0x01, + 0x6f, 0x82, 0xae, 0xec, 0x91, 0x08, 0x2e, 0x7c, + 0x64, 0x93, 0x46, 0x1c, 0x68, 0xef, 0xc2, 0x03, + 0x28, 0x1d, 0x75, 0x3a, 0xeb, 0x9c, 0x46, 0xf0, + 0xc9, 0xdb, 0x99, 0x95, 0x13, 0x66, 0x4d, 0xd5, + 0x1a, 0x78, 0x92, 0x51, 0x89, 0x72, 0x28, 0x7f, + 0x20, 0x70, 0x41, 0x49, 0xa2, 0x86, 0xe9, 0xf9, + 0x78, 0x5f, 0x8d, 0x2e, 0x5d, 0xfa, 0xdb, 0x57, + 0xd4, 0x71, 0xdf, 0x66, 0xe3, 0x9e, 0x88, 0x70, + 0xa4, 0x21, 0x44, 0x6a, 0xc7, 0xae, 0x30, 0x2c, + 0x9c, 0x1f, 0x91, 0x57, 0xc8, 0x24, 0x34, 0x2d, + 0x7a, 0x4a, 0x43, 0xc2, 0x5f, 0xab, 0x64, 0x2e, + 0xaa, 0x28, 0x32, 0x95, 0x42, 0x7b, 0xa0, 0xcc, + 0xdf, 0xfd, 0x22, 0xc8, 0x56, 0x84, 0xc1, 0x62, + 0x15, 0xb2, 0x77, 0x86, 0x81, 0xfc, 0xa5, 0x12, + 0x3c, 0xca, 0x28, 0x17, 0x8f, 0x03, 0x16, 0x6e, + 0xb8, 0x24, 0xfa, 0x1b, 0x15, 0x02, 0xfd, 0x8b, + 0xb6, 0x0a, 0x1a, 0xf7, 0x47, 0x41, 0xc5, 0x2b, + 0x37, 0x3e, 0xa1, 0xbf, 0x68, 0xda, 0x1c, 0x55, + 0x44, 0xc3, 0xee, 0xa1, 0x63, 0x07, 0x11, 0x3b, + 0x5f, 0x00, 0x84, 0xb4, 0xc4, 0xe4, 0xa7, 0x97, + 0x29, 0xf8, 0xce, 0xab, 0xfc, 0x27, 0x3e, 0x34, + 0xe4, 0xc7, 0x81, 0x52, 0x32, 0x0e, 0x27, 0x3c, + 0xa6, 0x70, 0x3f, 0x4a, 0x54, 0xda, 0xdd, 0x60, + 0x26, 0xb3, 0x6e, 0x45, 0x26, 0x19, 0x41, 0x6f + }; + + static const PRUint8 dh_known_Y_1[] = { + 0xb4, 0xc7, 0x85, 0xba, 0xa6, 0x98, 0xb3, 0x77, + 0x41, 0x2b, 0xd9, 0x9a, 0x72, 0x90, 0xa4, 0xac, + 0xc4, 0xf7, 0xc2, 0x23, 0x9a, 0x68, 0xe2, 0x7d, + 0x3a, 0x54, 0x45, 0x91, 0xc1, 0xd7, 0x8a, 0x17, + 0x54, 0xd3, 0x37, 0xaa, 0x0c, 0xcd, 0x0b, 0xe2, + 0xf2, 0x34, 0x0f, 0x17, 0xa8, 0x07, 0x88, 0xaf, + 0xed, 0xc1, 0x02, 0xd4, 0xdb, 0xdc, 0x0f, 0x22, + 0x51, 0x23, 0x40, 0xb9, 0x65, 0x6d, 0x39, 0xf4, + 0xe1, 0x8b, 0x57, 0x7d, 0xb6, 0xd3, 0xf2, 0x6b, + 0x02, 0xa9, 0x36, 0xf0, 0x0d, 0xe3, 0xdb, 0x9a, + 0xbf, 0x20, 0x00, 0x4d, 0xec, 0x6f, 0x68, 0x95, + 0xee, 0x59, 0x4e, 0x3c, 0xb6, 0xda, 0x7b, 0x19, + 0x08, 0x9a, 0xef, 0x61, 0x43, 0xf5, 0xfb, 0x25, + 0x70, 0x19, 0xc1, 0x5f, 0x0e, 0x0f, 0x6a, 0x63, + 0x44, 0xe9, 0xcf, 0x33, 0xce, 0x13, 0x4f, 0x34, + 0x3c, 0x94, 0x40, 0x8d, 0xf2, 0x65, 0x42, 0xef, + 0x70, 0x54, 0xdd, 0x5f, 0xc1, 0xd7, 0x0b, 0xa6, + 0x06, 0xd5, 0xa6, 0x47, 0xae, 0x2c, 0x1f, 0x5a, + 0xa6, 0xb3, 0xc1, 0x38, 0x3a, 0x3b, 0x60, 0x94, + 0xa2, 0x95, 0xab, 0xb2, 0x86, 0x82, 0xc5, 0x3b, + 0xb8, 0x6f, 0x3e, 0x55, 0x86, 0x84, 0xe0, 0x00, + 0xe5, 0xef, 0xca, 0x5c, 0xec, 0x7e, 0x38, 0x0f, + 0x82, 0xa2, 0xb1, 0xee, 0x48, 0x1b, 0x32, 0xbb, + 0x5a, 0x33, 0xa5, 0x01, 0xba, 0xca, 0xa6, 0x64, + 0x61, 0xb6, 0xe5, 0x5c, 0x0e, 0x5f, 0x2c, 0x66, + 0x0d, 0x01, 0x6a, 0x20, 0x04, 0x70, 0x68, 0x82, + 0x93, 0x29, 0x15, 0x3b, 0x7a, 0x06, 0xb2, 0x92, + 0x61, 0xcd, 0x7e, 0xa4, 0xc1, 0x15, 0x64, 0x3b, + 0x3c, 0x51, 0x10, 0x4c, 0x87, 0xa6, 0xaf, 0x07, + 0xce, 0x46, 0x82, 0x75, 0xf3, 0x90, 0xf3, 0x21, + 0x55, 0x74, 0xc2, 0xe4, 0x96, 0x7d, 0xc3, 0xe6, + 0x33, 0xa5, 0xc6, 0x51, 0xef, 0xec, 0x90, 0x08 + }; + + static const PRUint8 dh_known_x_2[] = { + 0x9e, 0x9b, 0xc3, 0x25, 0x53, 0xf9, 0xfc, 0x92, + 0xb6, 0xae, 0x54, 0x8e, 0x23, 0x4c, 0x94, 0xba, + 0x41, 0xe6, 0x29, 0x33, 0xb9, 0xdb, 0xff, 0x6d, + 0xa8, 0xb8, 0x48, 0x49, 0x66, 0x11, 0xa6, 0x13 + }; + + static const PRUint8 dh_known_hash_result[] = { + 0x93, 0xa2, 0x89, 0x1c, 0x8a, 0xc3, 0x70, 0xbf, + 0xa7, 0xdf, 0xb6, 0xd7, 0x82, 0xfb, 0x87, 0x81, + 0x09, 0x47, 0xf3, 0x9f, 0x5a, 0xbf, 0x4f, 0x3f, + 0x8e, 0x5e, 0x06, 0xca, 0x30, 0xa7, 0xaf, 0x10 + }; + + /* DH variables. */ + SECStatus dhStatus; + SECItem dh_prime; + SECItem dh_pub_key_1; + SECItem dh_priv_key_2; + SECItem ZZ = { 0, 0, 0 }; + PRUint8 computed_hash_result[HASH_LENGTH_MAX]; + + dh_prime.data = (PRUint8 *)dh_known_P; + dh_prime.len = sizeof(dh_known_P); + dh_pub_key_1.data = (PRUint8 *)dh_known_Y_1; + dh_pub_key_1.len = sizeof(dh_known_Y_1); + dh_priv_key_2.data = (PRUint8 *)dh_known_x_2; + dh_priv_key_2.len = sizeof(dh_known_x_2); + + /* execute the derive */ + dhStatus = DH_Derive(&dh_pub_key_1, &dh_prime, &dh_priv_key_2, &ZZ, dh_prime.len); + if (dhStatus != SECSuccess) { + goto loser; + } + + dhStatus = SHA256_HashBuf(computed_hash_result, ZZ.data, ZZ.len); + if (dhStatus != SECSuccess) { + goto loser; + } + + if (PORT_Memcmp(computed_hash_result, dh_known_hash_result, + sizeof(dh_known_hash_result)) != 0) { + dhStatus = SECFailure; + goto loser; + } + +loser: + if (ZZ.data) { + SECITEM_FreeItem(&ZZ, PR_FALSE); + } + + if (dhStatus != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + return (SECSuccess); +} + +static SECStatus +freebl_fips_RNG_PowerUpSelfTest(void) +{ + static const PRUint8 Q[] = { + 0x85, 0x89, 0x9c, 0x77, 0xa3, 0x79, 0xff, 0x1a, + 0x86, 0x6f, 0x2f, 0x3e, 0x2e, 0xf9, 0x8c, 0x9c, + 0x9d, 0xef, 0xeb, 0xed + }; + static const PRUint8 GENX[] = { + 0x65, 0x48, 0xe3, 0xca, 0xac, 0x64, 0x2d, 0xf7, + 0x7b, 0xd3, 0x4e, 0x79, 0xc9, 0x7d, 0xa6, 0xa8, + 0xa2, 0xc2, 0x1f, 0x8f, 0xe9, 0xb9, 0xd3, 0xa1, + 0x3f, 0xf7, 0x0c, 0xcd, 0xa6, 0xca, 0xbf, 0xce, + 0x84, 0x0e, 0xb6, 0xf1, 0x0d, 0xbe, 0xa9, 0xa3 + }; + static const PRUint8 rng_known_DSAX[] = { + 0x7a, 0x86, 0xf1, 0x7f, 0xbd, 0x4e, 0x6e, 0xd9, + 0x0a, 0x26, 0x21, 0xd0, 0x19, 0xcb, 0x86, 0x73, + 0x10, 0x1f, 0x60, 0xd7 + }; + + SECStatus rng_status = SECSuccess; + PRUint8 DSAX[FIPS_DSA_SUBPRIME_LENGTH]; + + /*******************************************/ + /* Run the SP 800-90 Health tests */ + /*******************************************/ + rng_status = PRNGTEST_RunHealthTests(); + if (rng_status != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + /*******************************************/ + /* Generate DSAX fow given Q. */ + /*******************************************/ + + rng_status = FIPS186Change_ReduceModQForDSA(GENX, Q, DSAX); + + /* Verify DSAX to perform the RNG integrity check */ + if ((rng_status != SECSuccess) || + (PORT_Memcmp(DSAX, rng_known_DSAX, + (FIPS_DSA_SUBPRIME_LENGTH)) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + return (SECSuccess); +} + +static SECStatus +freebl_fipsSoftwareIntegrityTest(const char *libname) +{ + SECStatus rv = SECSuccess; + + /* make sure that our check file signatures are OK */ + if (!BLAPI_VerifySelf(libname)) { + rv = SECFailure; + } + return rv; +} + +#define DO_FREEBL 1 +#define DO_REST 2 + +static SECStatus +freebl_fipsPowerUpSelfTest(unsigned int tests) +{ + SECStatus rv; + + /* + * stand alone freebl. Test hash, and rng + */ + if (tests & DO_FREEBL) { + + /* SHA-X Power-Up SelfTest(s). */ + rv = freebl_fips_SHA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + } + + /* + * test the rest of the algorithms not accessed through freebl + * standalone */ + if (tests & DO_REST) { + + /* RNG Power-Up SelfTest(s). */ + rv = freebl_fips_RNG_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* DES3 Power-Up SelfTest(s). */ + rv = freebl_fips_DES3_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* AES Power-Up SelfTest(s) for 128-bit key. */ + rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_128_KEY_SIZE); + + if (rv != SECSuccess) + return rv; + + /* AES Power-Up SelfTest(s) for 192-bit key. */ + rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_192_KEY_SIZE); + + if (rv != SECSuccess) + return rv; + + /* AES Power-Up SelfTest(s) for 256-bit key. */ + rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_256_KEY_SIZE); + + if (rv != SECSuccess) + return rv; + + /* HMAC SHA-X Power-Up SelfTest(s). */ + rv = freebl_fips_HMAC_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* TLS PRF Power-Up SelfTest(s). */ + rv = freebl_fips_TLS_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* NOTE: RSA can only be tested in full freebl. It requires access to + * the locking primitives */ + /* RSA Power-Up SelfTest(s). */ + rv = freebl_fips_RSA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* DSA Power-Up SelfTest(s). */ + rv = freebl_fips_DSA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* DH Power-Up SelfTest(s). */ + rv = freebl_fips_DH_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* EC Power-Up SelfTest(s). */ + rv = freebl_fips_EC_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + } + /* Passed Power-Up SelfTest(s). */ + return (SECSuccess); +} + +/* + * state variables. NOTE: freebl has two uses: a standalone use which + * provided limitted access to the hash functions throught the NSSLOWHASH_ + * interface and an joint use from softoken, using the function pointer + * table. The standalone use can operation without nspr or nss-util, while + * the joint use requires both to be loaded. Certain functions (like RSA) + * needs locking from NSPR, for instance. + * + * At load time, we need to handle the two uses separately. If nspr and + * nss-util are loaded, then we can run all the selftests, but if nspr and + * nss-util are not loaded, then we can't run all the selftests, and we need + * to prevent the softoken function pointer table from operating until the + * libraries are loaded and we try to use them. + */ +static PRBool self_tests_freebl_ran = PR_FALSE; +static PRBool self_tests_ran = PR_FALSE; +static PRBool self_tests_freebl_success = PR_FALSE; +static PRBool self_tests_success = PR_FALSE; + +/* + * accessors for freebl + */ +PRBool +BL_POSTRan(PRBool freebl_only) +{ + SECStatus rv; + /* if the freebl self tests didn't run, there is something wrong with + * our on load tests */ + if (!self_tests_freebl_ran) { + return PR_FALSE; + } + /* if all the self tests have run, we are good */ + if (self_tests_ran) { + return PR_TRUE; + } + /* if we only care about the freebl tests, we are good */ + if (freebl_only) { + return PR_TRUE; + } + /* run the rest of the self tests */ + /* We could get there if freebl was loaded without the rest of the support + * libraries, but now we want to use more than just a standalone freebl. + * This requires the other libraries to be loaded. + * If they are now loaded, Try to run the rest of the selftests, + * otherwise fail (disabling access to these algorithms) */ + self_tests_ran = PR_TRUE; + BL_Init(); /* required by RSA */ + RNG_RNGInit(); /* required by RSA */ + rv = freebl_fipsPowerUpSelfTest(DO_REST); + if (rv == SECSuccess) { + self_tests_success = PR_TRUE; + } + return PR_TRUE; +} + +#include "blname.c" + +/* + * This function is called at dll load time, the code tha makes this + * happen is platform specific on defined above. + */ +static void +bl_startup_tests(void) +{ + const char *libraryName; + PRBool freebl_only = PR_FALSE; + SECStatus rv; + + PORT_Assert(self_tests_freebl_ran == PR_FALSE); + PORT_Assert(self_tests_success == PR_FALSE); + self_tests_freebl_ran = PR_TRUE; /* we are running the tests */ + self_tests_success = PR_FALSE; /* force it just in case */ + self_tests_freebl_success = PR_FALSE; /* force it just in case */ + +#ifdef FREEBL_NO_DEPEND + rv = FREEBL_InitStubs(); + if (rv != SECSuccess) { + freebl_only = PR_TRUE; + } +#endif + + self_tests_freebl_ran = PR_TRUE; /* we are running the tests */ + + if (!freebl_only) { + self_tests_ran = PR_TRUE; /* we're running all the tests */ + BL_Init(); /* needs to be called before RSA can be used */ + RNG_RNGInit(); + } + + /* always run the post tests */ + rv = freebl_fipsPowerUpSelfTest(freebl_only ? DO_FREEBL : DO_FREEBL | DO_REST); + if (rv != SECSuccess) { + return; + } + + libraryName = getLibName(); + rv = freebl_fipsSoftwareIntegrityTest(libraryName); + if (rv != SECSuccess) { + return; + } + + /* posts are happy, allow the fips module to function now */ + self_tests_freebl_success = PR_TRUE; /* we always test the freebl stuff */ + if (!freebl_only) { + self_tests_success = PR_TRUE; + } +} + +/* + * this is called from the freebl init entry points that controll access to + * all other freebl functions. This prevents freebl from operating if our + * power on selftest failed. + */ +SECStatus +BL_FIPSEntryOK(PRBool freebl_only, PRBool rerun) +{ +#ifdef NSS_NO_INIT_SUPPORT + /* this should only be set on platforms that can't handle one of the INIT + * schemes. This code allows those platforms to continue to function, + * though they don't meet the strict NIST requirements. If NSS_NO_INIT_SUPPORT + * is not set, and init support has not been properly enabled, freebl + * will always fail because of the test below + */ + if (!self_tests_freebl_ran) { + bl_startup_tests(); + } +#endif + if (rerun) { + /* reset the flags */ + self_tests_freebl_ran = PR_FALSE; + self_tests_success = PR_FALSE; + self_tests_success = PR_FALSE; + self_tests_freebl_success = PR_FALSE; + bl_startup_tests(); + } + /* if the general self tests succeeded, we're done */ + if (self_tests_success) { + return SECSuccess; + } + /* standalone freebl can initialize */ + if (freebl_only && self_tests_freebl_success) { + return SECSuccess; + } + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} +#endif diff --git a/security/nss/lib/freebl/freebl.def b/security/nss/lib/freebl/freebl.def new file mode 100644 index 0000000000..164c843fdc --- /dev/null +++ b/security/nss/lib/freebl/freebl.def @@ -0,0 +1,26 @@ +;+# +;+# This Source Code Form is subject to the terms of the Mozilla Public +;+# License, v. 2.0. If a copy of the MPL was not distributed with this +;+# file, You can obtain one at http://mozilla.org/MPL/2.0/. +;+# +;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS +;+# 1. For all unix platforms, the string ";-" means "remove this line" +;+# 2. For all unix platforms, the string " DATA " will be removed from any +;+# line on which it occurs. +;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX. +;+# On AIX, lines containing ";+" will be removed. +;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed. +;+# 5. For all unix platforms, after the above processing has taken place, +;+# all characters after the first ";" on the line will be removed. +;+# And for AIX, the first ";" will also be removed. +;+# This file is passed directly to windows. Since ';' is a comment, all UNIX +;+# directives are hidden behind ";", ";+", and ";-" +;+ +;+NSSprivate_3.11 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +FREEBL_GetVector; +;+ local: +;+ *; +;+}; diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp new file mode 100644 index 0000000000..65f9a8013c --- /dev/null +++ b/security/nss/lib/freebl/freebl.gyp @@ -0,0 +1,954 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +{ + 'includes': [ + '../../coreconf/config.gypi' + ], + 'targets': [ + { + 'target_name': 'intel-gcm-s_lib', + 'type': 'static_library', + 'sources': [ + 'intel-aes.s', + 'intel-gcm.s', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'cc_is_clang==1 and force_integrated_as!=1', { + 'cflags': [ + '-no-integrated-as', + ], + 'cflags_mozilla': [ + '-no-integrated-as', + ], + 'asflags_mozilla': [ + '-no-integrated-as', + ], + }], + ], + }, + { + 'target_name': 'intel-gcm-wrap_c_lib', + 'type': 'static_library', + 'sources': [ + 'intel-gcm-wrap.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ '(OS=="linux" or OS=="android") and target_arch=="x64"', { + 'dependencies': [ + 'intel-gcm-s_lib', + ], + }], + ], + 'cflags': [ + '-mssse3', + ], + 'cflags_mozilla': [ + '-mssse3' + ], + }, + { + 'target_name': 'hw-acc-crypto-avx', + 'type': 'static_library', + # 'sources': [ + # All AVX hardware accelerated crypto currently requires x64 + # ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'target_arch=="x64"', { + 'cflags': [ + '-mssse3', + '-msse4.1', + '-msse4.2' + ], + 'cflags_mozilla': [ + '-mssse3', + '-msse4.1', + '-msse4.2', + '-mpclmul', + '-maes', + '-mavx', + ], + # GCC doesn't define this. + 'defines': [ + '__SSSE3__', + ], + }], + [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or \ + OS=="netbsd" or OS=="openbsd"', { + 'cflags': [ + '-mpclmul', + '-maes', + '-mavx', + ], + }], + # macOS build doesn't use cflags. + [ 'OS=="mac" or OS=="ios"', { + 'xcode_settings': { + 'OTHER_CFLAGS': [ + '-mssse3', + '-msse4.1', + '-msse4.2', + '-mpclmul', + '-maes', + '-mavx', + ], + }, + }], + [ 'target_arch=="arm"', { + # Gecko doesn't support non-NEON platform on Android, but tier-3 + # platform such as Linux/arm will need it + 'cflags_mozilla': [ + '-mfpu=neon' + ], + }], + [ 'target_arch=="x64"', { + 'sources': [ + 'verified/Hacl_Poly1305_128.c', + 'verified/Hacl_Chacha20_Vec128.c', + 'verified/Hacl_Chacha20Poly1305_128.c', + ], + }], + ], + }, + { + 'target_name': 'hw-acc-crypto-avx2', + 'type': 'static_library', + # 'sources': [ + # All AVX2 hardware accelerated crypto currently requires x64 + # ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'target_arch=="x64"', { + 'cflags': [ + '-mssse3', + '-msse4.1', + '-msse4.2' + ], + 'cflags_mozilla': [ + '-mssse3', + '-msse4.1', + '-msse4.2', + '-mpclmul', + '-maes', + '-mavx', + '-mavx2', + ], + # GCC doesn't define this. + 'defines': [ + '__SSSE3__', + ], + }], + [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or \ + OS=="netbsd" or OS=="openbsd"', { + 'cflags': [ + '-mpclmul', + '-maes', + '-mavx', + '-mavx2', + ], + }], + # macOS build doesn't use cflags. + [ 'OS=="mac" or OS=="ios"', { + 'xcode_settings': { + 'OTHER_CFLAGS': [ + '-mssse3', + '-msse4.1', + '-msse4.2', + '-mpclmul', + '-maes', + '-mavx', + '-mavx2', + ], + }, + }], + [ 'target_arch=="arm"', { + # Gecko doesn't support non-NEON platform on Android, but tier-3 + # platform such as Linux/arm will need it + 'cflags_mozilla': [ + '-mfpu=neon' + ], + }], + [ 'target_arch=="x64"', { + 'sources': [ + 'verified/Hacl_Poly1305_256.c', + 'verified/Hacl_Chacha20_Vec256.c', + 'verified/Hacl_Chacha20Poly1305_256.c', + ], + }], + ], + }, + { + 'target_name': 'gcm-aes-x86_c_lib', + 'type': 'static_library', + 'sources': [ + 'gcm-x86.c', 'aes-x86.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + # Enable isa option for pclmul and aes-ni; supported since gcc 4.4. + # This is only supported by x84/x64. It's not needed for Windows, + # unless clang-cl is used. + 'cflags_mozilla': [ + '-mpclmul', '-maes' + ], + 'conditions': [ + [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', { + 'cflags': [ + '-mpclmul', '-maes' + ], + }], + # macOS build doesn't use cflags. + [ 'OS=="mac" or OS=="ios"', { + 'xcode_settings': { + 'OTHER_CFLAGS': [ + '-mpclmul', '-maes' + ], + }, + }] + ] + }, + { + 'target_name': 'sha-x86_c_lib', + 'type': 'static_library', + 'sources': [ + 'sha256-x86.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'cflags': [ + '-msha', + '-mssse3', + '-msse4.1' + ], + 'cflags_mozilla': [ + '-msha', + '-mssse3', + '-msse4.1' + ], + 'conditions': [ + # macOS build doesn't use cflags. + [ 'OS=="mac" or OS=="ios"', { + 'xcode_settings': { + 'OTHER_CFLAGS': [ + '-msha', + '-mssse3', + '-msse4.1' + ], + }, + }] + ] + }, + { + 'target_name': 'gcm-aes-arm32-neon_c_lib', + 'type': 'static_library', + 'sources': [ + 'gcm-arm32-neon.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'cflags': [ + '-march=armv7', + '-mfpu=neon', + '<@(softfp_cflags)', + ], + 'cflags_mozilla': [ + '-mfpu=neon', + '<@(softfp_cflags)', + ] + }, + { + 'target_name': 'gcm-aes-aarch64_c_lib', + 'type': 'static_library', + 'sources': [ + 'gcm-aarch64.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'cflags': [ + '-march=armv8-a+crypto' + ], + 'cflags_mozilla': [ + '-march=armv8-a+crypto' + ] + }, + { + 'target_name': 'gcm-aes-ppc_c_lib', + 'type': 'static_library', + 'sources': [ + 'gcm-ppc.c', + 'sha512-p8.s', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'disable_crypto_vsx==0', { + 'cflags': [ + '-mcrypto', + '-maltivec' + ], + 'cflags_mozilla': [ + '-mcrypto', + '-maltivec' + ], + }, 'disable_crypto_vsx==1', { + 'cflags': [ + '-maltivec' + ], + 'cflags_mozilla': [ + '-maltivec' + ], + }] + ] + }, + { + 'target_name': 'gcm-aes-ppc_lib', + 'type': 'static_library', + 'sources': [ + 'ppc-gcm.s', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'cc_is_clang==1 and force_integrated_as!=1', { + 'cflags': [ + '-no-integrated-as', + ], + 'cflags_mozilla': [ + '-no-integrated-as', + ], + 'asflags_mozilla': [ + '-no-integrated-as', + ], + }], + ], + }, + { + 'target_name': 'ppc-gcm-wrap-nodepend_c_lib', + 'type': 'static_library', + 'sources': [ + 'ppc-gcm-wrap.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'gcm-aes-ppc_lib', + ], + }, + { + 'target_name': 'ppc-gcm-wrap_c_lib', + 'type': 'static_library', + 'sources': [ + 'ppc-gcm-wrap.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'gcm-aes-ppc_lib', + ], + 'defines!': [ + 'FREEBL_NO_DEPEND', + ], + }, + { + 'target_name': 'gcm-sha512-nodepend-ppc_c_lib', + 'type': 'static_library', + 'sources': [ + 'sha512.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'disable_crypto_vsx==0', { + 'cflags': [ + '-mcrypto', + '-maltivec', + '-mvsx', + '-funroll-loops', + '-fpeel-loops' + ], + 'cflags_mozilla': [ + '-mcrypto', + '-maltivec', + '-mvsx', + '-funroll-loops', + '-fpeel-loops' + ], + }, 'disable_crypto_vsx==1', { + 'cflags': [ + '-maltivec', + '-funroll-loops', + '-fpeel-loops' + ], + 'cflags_mozilla': [ + '-maltivec', + '-funroll-loops', + '-fpeel-loops' + ], + }] + ] + }, + { + 'target_name': 'gcm-sha512-ppc_c_lib', + 'type': 'static_library', + 'sources': [ + 'sha512.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'disable_crypto_vsx==0', { + 'cflags': [ + '-mcrypto', + '-maltivec', + '-mvsx', + '-funroll-loops', + '-fpeel-loops' + ], + 'cflags_mozilla': [ + '-mcrypto', + '-maltivec', + '-mvsx', + '-funroll-loops', + '-fpeel-loops' + ], + }, 'disable_crypto_vsx==1', { + 'cflags': [ + '-maltivec', + '-funroll-loops', + '-fpeel-loops' + ], + 'cflags_mozilla': [ + '-maltivec', + '-funroll-loops', + '-fpeel-loops' + ], + }] + ], + 'defines!': [ + 'FREEBL_NO_DEPEND', + ], + }, + { + 'target_name': 'chacha20-ppc_lib', + 'type': 'static_library', + 'sources': [ + 'chacha20poly1305-ppc.c', + 'chacha20-ppc64le.S', + ] + }, + { + 'target_name': 'armv8_c_lib', + 'type': 'static_library', + 'sources': [ + 'aes-armv8.c', + 'sha1-armv8.c', + 'sha256-armv8.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'target_arch=="arm"', { + 'cflags': [ + '-march=armv8-a', + '-mfpu=crypto-neon-fp-armv8', + '<@(softfp_cflags)', + ], + 'cflags_mozilla': [ + '-march=armv8-a', + '-mfpu=crypto-neon-fp-armv8', + '<@(softfp_cflags)', + ], + }, 'target_arch=="arm64" or target_arch=="aarch64"', { + 'cflags': [ + '-march=armv8-a+crypto' + ], + 'cflags_mozilla': [ + '-march=armv8-a+crypto' + ], + }] + ] + }, + { + 'target_name': 'freebl', + 'type': 'static_library', + 'sources': [ + 'loader.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ] + }, + # Build a static freebl library so we can statically link it into + # the binary. This way we don't have to dlopen() the shared lib + # but can directly call freebl functions. + { + 'target_name': 'freebl_static', + 'type': 'static_library', + 'includes': [ + 'freebl_base.gypi', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'hw-acc-crypto-avx', + 'hw-acc-crypto-avx2', + ], + 'conditions': [ + [ 'target_arch=="ia32" or target_arch=="x64"', { + 'dependencies': [ + 'gcm-aes-x86_c_lib', + ], + }, '(disable_arm_hw_aes==0 or disable_arm_hw_sha1==0 or disable_arm_hw_sha2==0) and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', { + 'dependencies': [ + 'armv8_c_lib' + ], + }], + [ '(target_arch=="ia32" or target_arch=="x64") and disable_intel_hw_sha==0', { + 'dependencies': [ + 'sha-x86_c_lib', + ], + }], + [ 'disable_arm32_neon==0 and target_arch=="arm"', { + 'dependencies': [ + 'gcm-aes-arm32-neon_c_lib', + ], + }], + [ 'disable_arm32_neon==1 and target_arch=="arm"', { + 'defines!': [ + 'NSS_DISABLE_ARM32_NEON', + ], + }], + [ 'target_arch=="arm64" or target_arch=="aarch64"', { + 'dependencies': [ + 'gcm-aes-aarch64_c_lib', + ], + }], + [ 'disable_altivec==0 and target_arch=="ppc64"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + 'gcm-sha512-ppc_c_lib', + ], + }], + [ 'disable_altivec==0 and target_arch=="ppc64le"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + 'gcm-sha512-ppc_c_lib', + 'chacha20-ppc_lib', + 'ppc-gcm-wrap_c_lib', + ], + }], + [ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', { + 'defines!': [ + 'NSS_DISABLE_ALTIVEC', + ], + }], + [ 'disable_crypto_vsx==1 and (target_arch=="ppc" or target_arch=="ppc64" or target_arch=="ppc64le")', { + 'defines!': [ + 'NSS_DISABLE_CRYPTO_VSX', + ], + }], + [ 'OS=="linux"', { + 'defines!': [ + 'FREEBL_NO_DEPEND', + 'FREEBL_LOWHASH', + 'USE_HW_AES', + 'INTEL_GCM', + 'PPC_GCM', + ], + 'conditions': [ + [ 'target_arch=="x64"', { + # The AES assembler code doesn't work in static builds. + # The linker complains about non-relocatable code, and I + # currently don't know how to fix this properly. + 'sources!': [ + 'intel-aes.s', + 'intel-gcm.s', + ], + }], + ], + }], + ], + }, + { + 'target_name': '<(freebl_name)', + 'type': 'shared_library', + 'includes': [ + 'freebl_base.gypi', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'hw-acc-crypto-avx', + 'hw-acc-crypto-avx2', + ], + 'conditions': [ + [ 'target_arch=="ia32" or target_arch=="x64"', { + 'dependencies': [ + 'gcm-aes-x86_c_lib', + ] + }, 'target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64"', { + 'dependencies': [ + 'armv8_c_lib', + ], + }], + [ '(target_arch=="ia32" or target_arch=="x64") and disable_intel_hw_sha==0', { + 'dependencies': [ + 'sha-x86_c_lib', + ], + }], + [ 'disable_arm32_neon==0 and target_arch=="arm"', { + 'dependencies': [ + 'gcm-aes-arm32-neon_c_lib', + ], + }], + [ 'disable_arm32_neon==1 and target_arch=="arm"', { + 'defines!': [ + 'NSS_DISABLE_ARM32_NEON', + ], + }], + [ 'target_arch=="arm64" or target_arch=="aarch64"', { + 'dependencies': [ + 'gcm-aes-aarch64_c_lib', + ], + }], + [ 'disable_altivec==0', { + 'conditions': [ + [ 'target_arch=="ppc64"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + 'gcm-sha512-nodepend-ppc_c_lib', + ], + }, 'target_arch=="ppc64le"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + 'gcm-sha512-nodepend-ppc_c_lib', + 'ppc-gcm-wrap-nodepend_c_lib', + ], + }], + ], + }], + [ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', { + 'defines!': [ + 'NSS_DISABLE_ALTIVEC', + ], + }], + [ 'disable_crypto_vsx==1 and (target_arch=="ppc" or target_arch=="ppc64" or target_arch=="ppc64le")', { + 'defines!': [ + 'NSS_DISABLE_CRYPTO_VSX', + ], + }], + [ 'OS!="linux"', { + 'conditions': [ + [ 'moz_fold_libs==0', { + 'dependencies': [ + '<(DEPTH)/lib/util/util.gyp:nssutil3', + ], + }, { + 'libraries': [ + '<(moz_folded_library_name)', + ], + }], + ], + }], + [ '(OS=="linux" or OS=="android") and target_arch=="x64"', { + 'dependencies': [ + 'intel-gcm-wrap_c_lib', + ], + }], + [ 'OS=="win" and (target_arch=="ia32" or target_arch=="x64") and cc_is_clang==1', { + 'dependencies': [ + 'intel-gcm-wrap_c_lib', + ], + }], + [ 'OS=="linux"', { + 'sources': [ + 'nsslowhash.c', + 'stubs.c', + ], + }], + ], + 'variables': { + 'conditions': [ + [ 'OS=="linux"', { + 'mapfile': 'freebl_hash_vector.def', + }, { + 'mapfile': 'freebl.def', + }], + ] + }, + }, + { + 'target_name': 'freebl_64int_3', + 'includes': [ + 'freebl_base.gypi', + ], + 'type': 'shared_library', + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'hw-acc-crypto-avx', + 'hw-acc-crypto-avx2', + ], + }, + { + 'target_name': 'freebl_64fpu_3', + 'includes': [ + 'freebl_base.gypi', + ], + 'type': 'shared_library', + 'sources': [ + 'mpi/mpi_sparc.c', + 'mpi/mpv_sparcv9.s', + 'mpi/montmulfv9.s', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'hw-acc-crypto-avx', + 'hw-acc-crypto-avx2', + ], + 'asflags_mozilla': [ + '-mcpu=v9', '-Wa,-xarch=v9a' + ], + 'defines': [ + 'MP_NO_MP_WORD', + 'MP_USE_UINT_DIGIT', + 'MP_ASSEMBLY_MULTIPLY', + 'MP_USING_MONT_MULF', + 'MP_MONT_USE_MP_MUL', + ], + }, + ], + 'conditions': [ + [ 'OS=="linux"', { + # stub build + 'targets': [ + { + 'target_name': 'freebl3', + 'type': 'shared_library', + 'defines': [ + 'FREEBL_NO_DEPEND', + ], + 'sources': [ + 'lowhash_vector.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'variables': { + 'mapfile': 'freebl_hash.def' + } + }, + ], + }], + ], + 'target_defaults': { + 'include_dirs': [ + 'mpi', + 'ecl', + 'verified', + 'verified/internal', + 'verified/karamel/include', + 'verified/karamel/krmllib/dist/minimal', + 'deprecated', + ], + 'defines': [ + 'SHLIB_SUFFIX=\"<(dll_suffix)\"', + 'SHLIB_PREFIX=\"<(dll_prefix)\"', + 'SHLIB_VERSION=\"3\"', + 'SOFTOKEN_SHLIB_VERSION=\"3\"', + 'RIJNDAEL_INCLUDE_TABLES', + 'MP_API_COMPATIBLE' + ], + 'conditions': [ + [ 'OS=="win" and target_arch=="ia32"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + #TODO: -Ox optimize flags + 'PreprocessorDefinitions': [ + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_ASSEMBLY_DIV_2DX1D', + 'MP_USE_UINT_DIGIT', + 'MP_NO_MP_WORD', + 'USE_HW_AES', + 'INTEL_GCM', + ], + }, + }, + }], + [ 'OS=="win" and target_arch=="x64"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + #TODO: -Ox optimize flags + 'PreprocessorDefinitions': [ + # Should be copied to mingw defines below + 'MP_IS_LITTLE_ENDIAN', + 'NSS_BEVAND_ARCFOUR', + 'MPI_AMD64', + 'MP_ASSEMBLY_MULTIPLY', + 'NSS_USE_COMBA', + 'USE_HW_AES', + 'INTEL_GCM', + ], + }, + }, + }], + [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="ia32" or target_arch=="x64") and disable_intel_hw_sha==0', { + 'defines': [ + 'USE_HW_SHA2', + ], + }], + [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_aes==0', { + 'defines': [ + 'USE_HW_AES', + ], + }], + [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha1==0', { + 'defines': [ + 'USE_HW_SHA1', + ], + }], + [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha2==0', { + 'defines': [ + 'USE_HW_SHA2', + ], + }], + [ 'cc_use_gnu_ld==1 and OS=="win" and target_arch=="x64"', { + # mingw x64 + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + ], + }], + # Poly1305_256 requires the flag to run + ['target_arch=="x64"', { + 'defines':[ + 'HACL_CAN_COMPILE_VEC128', + 'HACL_CAN_COMPILE_VEC256', + ], + }], + # MSVC has no __int128 type. Use emulated int128 and leave + # have_int128_support as-is for Curve25519 impl. selection. + [ 'have_int128_support==1 and (OS!="win" or cc_is_clang==1 or cc_is_gcc==1)', { + 'defines': [ + # The Makefile does version-tests on GCC, but we're not doing that here. + 'HAVE_INT128_SUPPORT', + ], + }, { + 'defines': [ + 'KRML_VERIFIED_UINT128', + ], + }], + [ 'OS=="linux"', { + 'defines': [ + 'FREEBL_LOWHASH', + 'FREEBL_NO_DEPEND', + ], + 'conditions': [ + [ 'disable_altivec==0 and target_arch=="ppc64le"', { + 'defines': [ + 'PPC_GCM', + ], + }], + ], + }], + [ 'supports_vale_curve25519==1', { + 'defines': [ + # The Makefile does version-tests on GCC, but we're not doing that here. + 'HACL_CAN_COMPILE_INLINE_ASM', + ], + }], + [ 'OS=="linux" or OS=="android"', { + 'conditions': [ + [ 'target_arch=="x64"', { + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + 'NSS_BEVAND_ARCFOUR', + 'MPI_AMD64', + 'MP_ASSEMBLY_MULTIPLY', + 'NSS_USE_COMBA', + ], + }], + [ 'target_arch=="x64"', { + 'defines': [ + 'USE_HW_AES', + 'INTEL_GCM', + ], + }], + [ 'target_arch=="ia32"', { + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_ASSEMBLY_DIV_2DX1D', + 'MP_USE_UINT_DIGIT', + ], + }], + [ 'target_arch=="arm"', { + 'defines': [ + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_USE_UINT_DIGIT', + 'SHA_NO_LONG_LONG', + 'ARMHF', + ], + }], + [ 'disable_intel_hw_sha==0 and (target_arch=="ia32" or target_arch=="x64")', { + 'defines': [ + 'USE_HW_SHA2', + ], + }], + [ 'disable_arm_hw_aes==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', { + 'defines': [ + 'USE_HW_AES', + ], + }], + [ 'disable_arm_hw_sha1==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', { + 'defines': [ + 'USE_HW_SHA1', + ], + }], + [ 'disable_arm_hw_sha2==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', { + 'defines': [ + 'USE_HW_SHA2', + ], + }], + ], + }], + ], + }, + 'variables': { + 'module': 'nss', + 'conditions': [ + [ 'target_arch=="x64" and cc_is_gcc==1', { + 'supports_vale_curve25519%': 1, + }, { + 'supports_vale_curve25519%': 0, + }], + [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', { + 'have_int128_support%': 1, + }, { + 'have_int128_support%': 0, + }], + [ 'target_arch=="arm"', { + # When the compiler uses the softfloat ABI, we want to use the compatible softfp ABI when enabling NEON for these objects. + # Confusingly, __SOFTFP__ is the name of the define for the softfloat ABI, not for the softfp ABI. + 'softfp_cflags': ' /dev/null && echo -mfloat-abi=softfp || true)', + }], + ], + } +} diff --git a/security/nss/lib/freebl/freebl.rc b/security/nss/lib/freebl/freebl.rc new file mode 100644 index 0000000000..444ae5d03f --- /dev/null +++ b/security/nss/lib/freebl/freebl.rc @@ -0,0 +1,68 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "softkver.h" +#include + +#define MY_LIBNAME "freebl" +#define MY_FILEDESCRIPTION "NSS freebl Library" + +#define STRINGIZE(x) #x +#define STRINGIZE2(x) STRINGIZE(x) +#define SOFTOKEN_VMAJOR_STR STRINGIZE2(SOFTOKEN_VMAJOR) + +#ifdef _DEBUG +#define MY_DEBUG_STR " (debug)" +#define MY_FILEFLAGS_1 VS_FF_DEBUG +#else +#define MY_DEBUG_STR "" +#define MY_FILEFLAGS_1 0x0L +#endif +#if SOFTOKEN_BETA +#define MY_FILEFLAGS_2 MY_FILEFLAGS_1|VS_FF_PRERELEASE +#else +#define MY_FILEFLAGS_2 MY_FILEFLAGS_1 +#endif + +#ifdef WINNT +#define MY_FILEOS VOS_NT_WINDOWS32 +#else +#define MY_FILEOS VOS__WINDOWS32 +#endif + +#define MY_INTERNAL_NAME MY_LIBNAME SOFTOKEN_VMAJOR_STR + +///////////////////////////////////////////////////////////////////////////// +// +// Version-information resource +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD + PRODUCTVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK + FILEFLAGS MY_FILEFLAGS_2 + FILEOS MY_FILEOS + FILETYPE VFT_DLL + FILESUBTYPE 0x0L // not used + +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904B0" // Lang=US English, CharSet=Unicode + BEGIN + VALUE "CompanyName", "Mozilla Foundation\0" + VALUE "FileDescription", MY_FILEDESCRIPTION MY_DEBUG_STR "\0" + VALUE "FileVersion", SOFTOKEN_VERSION "\0" + VALUE "InternalName", MY_INTERNAL_NAME "\0" + VALUE "OriginalFilename", MY_INTERNAL_NAME ".dll\0" + VALUE "ProductName", "Network Security Services\0" + VALUE "ProductVersion", SOFTOKEN_VERSION "\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END diff --git a/security/nss/lib/freebl/freebl_base.gypi b/security/nss/lib/freebl/freebl_base.gypi new file mode 100644 index 0000000000..d198c442b2 --- /dev/null +++ b/security/nss/lib/freebl/freebl_base.gypi @@ -0,0 +1,236 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +{ + 'sources': [ + 'aeskeywrap.c', + 'cmac.c', + 'alghmac.c', + 'arcfive.c', + 'arcfour.c', + 'blake2b.c', + 'camellia.c', + 'chacha20poly1305.c', + 'crypto_primitives.c', + 'ctr.c', + 'cts.c', + 'des.c', + 'desblapi.c', + 'dh.c', + 'drbg.c', + 'dsa.c', + 'ec.c', + 'ecdecode.c', + 'ecl/ec_naf.c', + 'ecl/ecl.c', + 'ecl/ecl_gf.c', + 'ecl/ecl_mult.c', + 'ecl/ecp_25519.c', + 'ecl/ecp_256.c', + 'ecl/ecp_256_32.c', + 'ecl/ecp_384.c', + 'ecl/ecp_521.c', + 'ecl/ecp_aff.c', + 'ecl/ecp_jac.c', + 'ecl/ecp_jm.c', + 'ecl/ecp_mont.c', + 'ecl/ecp_secp384r1.c', + 'ecl/ecp_secp521r1.c', + 'fipsfreebl.c', + 'blinit.c', + 'freeblver.c', + 'gcm.c', + 'hmacct.c', + 'jpake.c', + 'ldvector.c', + 'md2.c', + 'md5.c', + 'mpi/mp_gf2m.c', + 'mpi/mpcpucache.c', + 'mpi/mpi.c', + 'mpi/mplogic.c', + 'mpi/mpmontg.c', + 'mpi/mpprime.c', + 'pqg.c', + 'rawhash.c', + 'rijndael.c', + 'rsa.c', + 'rsapkcs.c', + 'sha_fast.c', + 'shvfy.c', + 'sysrand.c', + 'tlsprfalg.c', + 'secmpi.c', + ], + 'conditions': [ + [ 'OS=="linux" or OS=="android"', { + 'conditions': [ + [ 'target_arch=="x64"', { + 'sources': [ + 'arcfour-amd64-gas.s', + 'mpi/mpi_amd64.c', + 'mpi/mpi_amd64_common.S', + 'mpi/mp_comba.c', + ], + 'conditions': [ + [ 'cc_is_clang==1 and fuzz!=1 and coverage!=1 and force_integrated_as!=1', { + 'cflags': [ + '-no-integrated-as', + ], + 'cflags_mozilla': [ + '-no-integrated-as', + ], + 'asflags_mozilla': [ + '-no-integrated-as', + ], + }], + ], + }], + [ 'target_arch=="ia32"', { + 'sources': [ + 'mpi/mpi_x86.s', + ], + }], + [ 'target_arch=="arm"', { + 'sources': [ + 'mpi/mpi_arm.c', + ], + }], + [ 'target_arch=="ppc64le"', { + 'sources': [ + 'chacha20poly1305-ppc.c', + 'chacha20-ppc64le.S', + ], + }] + ], + }], + [ 'OS=="win"', { + 'libraries': [ + '-ladvapi32', + ], + 'conditions': [ + [ 'cc_use_gnu_ld!=1 and target_arch=="x64"', { + 'sources': [ + 'arcfour-amd64-masm.asm', + 'mpi/mpi_amd64.c', + 'mpi/mpi_amd64_masm.asm', + 'mpi/mp_comba_amd64_masm.asm', + 'intel-aes-x64-masm.asm', + 'intel-gcm-x64-masm.asm', + ], + }], + [ 'cc_use_gnu_ld!=1 and target_arch=="ia32"', { + 'sources': [ + 'mpi/mpi_x86_asm.c', + 'intel-aes-x86-masm.asm', + 'intel-gcm-x86-masm.asm', + ], + }], + [ 'cc_use_gnu_ld==1', { + # mingw + 'sources': [ + ], + }], + [ 'cc_is_clang!=1', { + # MSVC + 'sources': [ + 'intel-gcm-wrap.c', + ], + }], + ], + }], + ['have_int128_support==1', { + 'sources': [ + # All intel x64 and 64-bit ARM architectures get the 64 bit version. + 'ecl/curve25519_64.c', + 'verified/Hacl_Curve25519_51.c', + ], + }, { + 'sources': [ + # All other architectures get the generic 32 bit implementation. + 'ecl/curve25519_32.c', + ], + }], + ['supports_vale_curve25519==1', { + 'sources': [ + 'verified/Hacl_Curve25519_64.c', + ], + }], + ['(target_arch!="ppc64" and target_arch!="ppc64le") or disable_altivec==1', { + 'sources': [ + # Gyp does not support per-file cflags, so working around like this. + # ppc performance greatly benefits from specific flags. + 'sha512.c', + ], + }], + [ 'disable_chachapoly==0', { + # The ChaCha20 code is linked in through the static ssse3-crypto lib on + # all platforms that support SSSE3. There are runtime checks in place to + # choose the correct ChaCha implementation at runtime. + 'sources': [ + 'verified/Hacl_Chacha20.c', + 'verified/Hacl_Chacha20Poly1305_32.c', + 'verified/Hacl_Poly1305_32.c', + ], + }], + [ 'disable_deprecated_seed==0', { + 'sources': [ + 'deprecated/seed.c', + ], + }], + [ 'disable_deprecated_rc2==0', { + 'sources': [ + 'deprecated/alg2268.c', + ], + }], + [ 'fuzz==1', { + 'sources!': [ 'drbg.c' ], + 'sources': [ 'det_rng.c' ], + }], + [ 'fuzz_tls==1', { + 'defines': [ + 'UNSAFE_FUZZER_MODE', + ], + }], + [ 'ct_verif==1', { + 'defines': [ + 'CT_VERIF', + ], + }], + [ 'only_dev_random==1', { + 'defines': [ + 'SEED_ONLY_DEV_URANDOM', + ] + }], + [ 'OS=="mac"', { + 'conditions': [ + [ 'target_arch=="ia32"', { + 'sources': [ + 'mpi/mpi_sse2.s', + ], + 'defines': [ + 'MP_USE_UINT_DIGIT', + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_ASSEMBLY_DIV_2DX1D', + ], + }, 'target_arch=="x64"', { + 'sources': [ + 'mpi/mpi_amd64.c', + 'mpi/mpi_amd64_common.S', + 'mpi/mp_comba.c', + ], + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + 'MPI_AMD64', + 'MP_ASSEMBLY_MULTIPLY', + 'NSS_USE_COMBA', + ], + }], + ], + }], + ], + 'ldflags': [ + '-Wl,-Bsymbolic' + ], +} diff --git a/security/nss/lib/freebl/freebl_hash.def b/security/nss/lib/freebl/freebl_hash.def new file mode 100644 index 0000000000..9fd27367e1 --- /dev/null +++ b/security/nss/lib/freebl/freebl_hash.def @@ -0,0 +1,39 @@ +;+# +;+# This Source Code Form is subject to the terms of the Mozilla Public +;+# License, v. 2.0. If a copy of the MPL was not distributed with this +;+# file, You can obtain one at http://mozilla.org/MPL/2.0/. +;+# +;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS +;+# 1. For all unix platforms, the string ";-" means "remove this line" +;+# 2. For all unix platforms, the string " DATA " will be removed from any +;+# line on which it occurs. +;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX. +;+# On AIX, lines containing ";+" will be removed. +;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed. +;+# 5. For all unix platforms, after the above processing has taken place, +;+# all characters after the first ";" on the line will be removed. +;+# And for AIX, the first ";" will also be removed. +;+# This file is passed directly to windows. Since ';' is a comment, all UNIX +;+# directives are hidden behind ";", ";+", and ";-" +;+ +;+NSSprivate_3.11 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +FREEBL_GetVector; +;+ local: +;+ *; +;+}; +;+NSSRAWHASH_3.12.3 { # NSS 3.12.3 release +;+ global: +NSSLOW_Init; +NSSLOW_Shutdown; +NSSLOWHASH_Length; +NSSLOWHASH_Begin; +NSSLOWHASH_Destroy; +NSSLOWHASH_End; +NSSLOWHASH_NewContext; +NSSLOWHASH_Update; +;+ local: +;+ *; +;+}; diff --git a/security/nss/lib/freebl/freebl_hash_vector.def b/security/nss/lib/freebl/freebl_hash_vector.def new file mode 100644 index 0000000000..9d7d07d545 --- /dev/null +++ b/security/nss/lib/freebl/freebl_hash_vector.def @@ -0,0 +1,34 @@ +;+# +;+# This Source Code Form is subject to the terms of the Mozilla Public +;+# License, v. 2.0. If a copy of the MPL was not distributed with this +;+# file, You can obtain one at http://mozilla.org/MPL/2.0/. +;+# +;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS +;+# 1. For all unix platforms, the string ";-" means "remove this line" +;+# 2. For all unix platforms, the string " DATA " will be removed from any +;+# line on which it occurs. +;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX. +;+# On AIX, lines containing ";+" will be removed. +;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed. +;+# 5. For all unix platforms, after the above processing has taken place, +;+# all characters after the first ";" on the line will be removed. +;+# And for AIX, the first ";" will also be removed. +;+# This file is passed directly to windows. Since ';' is a comment, all UNIX +;+# directives are hidden behind ";", ";+", and ";-" +;+ +;+NSSprivate_3.11 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +FREEBL_GetVector; +;+ local: +;+ *; +;+}; +;+NSSprivate_3.16 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +NSSLOW_GetVector; +;+ local: +;+ *; +;+}; diff --git a/security/nss/lib/freebl/freeblver.c b/security/nss/lib/freebl/freeblver.c new file mode 100644 index 0000000000..9136f0b0bd --- /dev/null +++ b/security/nss/lib/freebl/freeblver.c @@ -0,0 +1,18 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Library identity and versioning */ + +#include "softkver.h" + +#if defined(DEBUG) +#define _DEBUG_STRING " (debug)" +#else +#define _DEBUG_STRING "" +#endif + +/* + * Version information + */ +const char __nss_freebl_version[] = "Version: NSS " SOFTOKEN_VERSION _DEBUG_STRING; diff --git a/security/nss/lib/freebl/gcm-aarch64.c b/security/nss/lib/freebl/gcm-aarch64.c new file mode 100644 index 0000000000..3f3c046d75 --- /dev/null +++ b/security/nss/lib/freebl/gcm-aarch64.c @@ -0,0 +1,96 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "gcm.h" +#include "secerr.h" + +/* old gcc doesn't support some poly64x2_t intrinsic */ +#if defined(__aarch64__) && defined(IS_LITTLE_ENDIAN) && \ + (defined(__clang__) || defined(__GNUC__) && __GNUC__ > 6) + +#include + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + uint8x16_t ci = vrbitq_u8(vreinterpretq_u8_u64(ghash->x)); + vst1q_u8(outbuf, ci); + return SECSuccess; +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + const poly64x2_t p = vdupq_n_p64(0x87); + const uint8x16_t zero = vdupq_n_u8(0); + const uint64x2_t h = ghash->h; + uint64x2_t ci = ghash->x; + unsigned int i; + uint8x16_t z_low, z_high; + uint8x16_t t_low, t_high; + poly64x2_t t1; + uint8x16_t t2; + + for (i = 0; i < count; i++, buf += 16) { + ci = vreinterpretq_u64_u8(veorq_u8(vreinterpretq_u8_u64(ci), + vrbitq_u8(vld1q_u8(buf)))); + + /* Do binary mult ghash->X = Ci * ghash->H. */ + z_low = vreinterpretq_u8_p128( + vmull_p64((poly64_t)vget_low_p64(vreinterpretq_p64_u64(ci)), + (poly64_t)vget_low_p64(vreinterpretq_p64_u64(h)))); + z_high = vreinterpretq_u8_p128( + vmull_high_p64(vreinterpretq_p64_u64(ci), vreinterpretq_p64_u64(h))); + t1 = vreinterpretq_p64_u8( + vextq_u8(vreinterpretq_u8_u64(h), vreinterpretq_u8_u64(h), 8)); + t_low = vreinterpretq_u8_p128( + vmull_p64((poly64_t)vget_low_p64(vreinterpretq_p64_u64(ci)), + (poly64_t)vget_low_p64(t1))); + t_high = vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u64(ci), t1)); + t2 = veorq_u8(t_high, t_low); + z_low = veorq_u8(z_low, vextq_u8(zero, t2, 8)); + z_high = veorq_u8(z_high, vextq_u8(t2, zero, 8)); + + /* polynomial reduction */ + t2 = vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(z_high), p)); + z_high = veorq_u8(z_high, vextq_u8(t2, zero, 8)); + z_low = veorq_u8(z_low, vextq_u8(zero, t2, 8)); + ci = veorq_u64(vreinterpretq_u64_u8(z_low), + vreinterpretq_u64_p128( + vmull_p64((poly64_t)vget_low_p64(vreinterpretq_p64_u8(z_high)), + (poly64_t)vget_low_p64(p)))); + } + + ghash->x = ci; + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + /* Workaround of "used uninitialized in this function" error */ + uint64x2_t h = vdupq_n_u64(0); + + ghash->ghash_mul = gcm_HashMult_hw; + ghash->x = vdupq_n_u64(0); + h = vsetq_lane_u64(__builtin_bswap64(ghash->h_low), h, 1); + h = vsetq_lane_u64(__builtin_bswap64(ghash->h_high), h, 0); + h = vreinterpretq_u64_u8(vrbitq_u8(vreinterpretq_u8_u64(h))); + ghash->h = h; + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x = vdupq_n_u64(0); + return SECSuccess; +} + +#endif /* defined(__clang__) || (defined(__GNUC__) && __GNUC__ > 6) */ diff --git a/security/nss/lib/freebl/gcm-arm32-neon.c b/security/nss/lib/freebl/gcm-arm32-neon.c new file mode 100644 index 0000000000..be04247701 --- /dev/null +++ b/security/nss/lib/freebl/gcm-arm32-neon.c @@ -0,0 +1,202 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "blapii.h" +#include "blapit.h" +#include "gcm.h" +#include "secerr.h" +#include "prtypes.h" + +#if defined(IS_LITTLE_ENDIAN) + +#include + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + vst1_u8(outbuf, vrev64_u8(vcreate_u8(ghash->x_high))); + vst1_u8(outbuf + 8, vrev64_u8(vcreate_u8(ghash->x_low))); + return SECSuccess; +} + +/* Carry-less multiplication. a * b = ret. */ +static inline uint8x16_t +clmul(const uint8x8_t a, const uint8x8_t b) +{ + uint8x16_t d, e, f, g, h, i, j, k, l, m, n; + uint8x8_t t_high, t_low; + uint8x16_t t0, t1, t2, t3; + const uint8x8_t k16 = vcreate_u8(0xffff); + const uint8x8_t k32 = vcreate_u8(0xffffffff); + const uint8x8_t k48 = vcreate_u8(0xffffffffffff); + + // D = A * B + d = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a), + vreinterpret_p8_u8(b))); + // E = A * B1 + e = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a), + vreinterpret_p8_u8(vext_u8(b, b, 1)))); + // F = A1 * B + f = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 1)), + vreinterpret_p8_u8(b))); + // G = A * B2 + g = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a), + vreinterpret_p8_u8(vext_u8(b, b, 2)))); + // H = A2 * B + h = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 2)), + vreinterpret_p8_u8(b))); + // I = A * B3 + i = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a), + vreinterpret_p8_u8(vext_u8(b, b, 3)))); + // J = A3 * B + j = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 3)), + vreinterpret_p8_u8(b))); + // K = A * B4 + k = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a), + vreinterpret_p8_u8(vext_u8(b, b, 4)))); + // L = E + F + l = veorq_u8(e, f); + // M = G + H + m = veorq_u8(g, h); + // N = I + J + n = veorq_u8(i, j); + + // t0 = (L) (P0 + P1) << 8 + t_high = vget_high_u8(l); + t_low = vget_low_u8(l); + t_low = veor_u8(t_low, t_high); + t_high = vand_u8(t_high, k48); + t_low = veor_u8(t_low, t_high); + t0 = vcombine_u8(t_low, t_high); + t0 = vextq_u8(t0, t0, 15); + + // t1 = (M) (P2 + P3) << 16 + t_high = vget_high_u8(m); + t_low = vget_low_u8(m); + t_low = veor_u8(t_low, t_high); + t_high = vand_u8(t_high, k32); + t_low = veor_u8(t_low, t_high); + t1 = vcombine_u8(t_low, t_high); + t1 = vextq_u8(t1, t1, 14); + + // t2 = (N) (P4 + P5) << 24 + t_high = vget_high_u8(n); + t_low = vget_low_u8(n); + t_low = veor_u8(t_low, t_high); + t_high = vand_u8(t_high, k16); + t_low = veor_u8(t_low, t_high); + t2 = vcombine_u8(t_low, t_high); + t2 = vextq_u8(t2, t2, 13); + + // t3 = (K) (P6 + P7) << 32 + t_high = vget_high_u8(k); + t_low = vget_low_u8(k); + t_low = veor_u8(t_low, t_high); + t_high = vdup_n_u8(0); + t3 = vcombine_u8(t_low, t_high); + t3 = vextq_u8(t3, t3, 12); + + t0 = veorq_u8(t0, t1); + t2 = veorq_u8(t2, t3); + return veorq_u8(veorq_u8(d, t0), t2); +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + const uint8x8_t h_low = vcreate_u8(ghash->h_low); + const uint8x8_t h_high = vcreate_u8(ghash->h_high); + uint8x16_t ci; + uint8x8_t ci_low; + uint8x8_t ci_high; + uint8x16_t z0, z2, z1a; + uint8x16_t z_high, z_low; + uint8x16_t t; + int64x2_t t1, t2, t3; + uint64x2_t z_low_l, z_low_r, z_high_l, z_high_r; + size_t i; + + ci = vcombine_u8(vcreate_u8(ghash->x_low), vcreate_u8(ghash->x_high)); + + for (i = 0; i < count; i++, buf += 16) { + ci = veorq_u8(ci, vcombine_u8(vrev64_u8(vld1_u8(buf + 8)), + vrev64_u8(vld1_u8(buf)))); + ci_high = vget_high_u8(ci); + ci_low = vget_low_u8(ci); + + /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */ + z0 = clmul(ci_low, h_low); + z2 = clmul(ci_high, h_high); + z1a = clmul(veor_u8(ci_high, ci_low), veor_u8(h_high, h_low)); + z1a = veorq_u8(z0, z1a); + z1a = veorq_u8(z2, z1a); + z_high = vcombine_u8(veor_u8(vget_low_u8(z2), vget_high_u8(z1a)), + vget_high_u8(z2)); + z_low = vcombine_u8(vget_low_u8(z0), + veor_u8(vget_high_u8(z0), vget_low_u8(z1a))); + + /* Shift one (multiply by x) as gcm spec is stupid. */ + z_low_l = vshlq_n_u64(vreinterpretq_u64_u8(z_low), 1); + z_low_r = vshrq_n_u64(vreinterpretq_u64_u8(z_low), 63); + z_high_l = vshlq_n_u64(vreinterpretq_u64_u8(z_high), 1); + z_high_r = vshrq_n_u64(vreinterpretq_u64_u8(z_high), 63); + z_low = vreinterpretq_u8_u64( + vcombine_u64(vget_low_u64(z_low_l), + vorr_u64(vget_high_u64(z_low_l), + vget_low_u64(z_low_r)))); + z_high = vreinterpretq_u8_u64( + vcombine_u64(vorr_u64(vget_low_u64(z_high_l), + vget_high_u64(z_low_r)), + vorr_u64(vget_high_u64(z_high_l), + vget_low_u64(z_high_r)))); + + /* Reduce */ + t1 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 57); + t2 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 62); + t3 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 63); + t = vreinterpretq_u8_s64(veorq_s64(t1, veorq_s64(t2, t3))); + + z_low = vcombine_u8(vget_low_u8(z_low), + veor_u8(vget_high_u8(z_low), vget_low_u8(t))); + z_high = vcombine_u8(veor_u8(vget_low_u8(z_high), vget_high_u8(t)), + vget_high_u8(z_high)); + + t = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(z_low), 1)); + z_high = veorq_u8(z_high, z_low); + z_low = veorq_u8(z_low, t); + t = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(t), 6)); + z_low = vreinterpretq_u8_u64( + vshrq_n_u64(vreinterpretq_u64_u8(z_low), 1)); + z_low = veorq_u8(z_low, z_high); + ci = veorq_u8(z_low, t); + } + + vst1_u8((uint8_t *)&ghash->x_high, vget_high_u8(ci)); + vst1_u8((uint8_t *)&ghash->x_low, vget_low_u8(ci)); + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + ghash->ghash_mul = gcm_HashMult_hw; + ghash->x_low = 0; + ghash->x_high = 0; + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x_low = 0; + ghash->x_high = 0; + return SECSuccess; +} + +#endif /* IS_LITTLE_ENDIAN */ diff --git a/security/nss/lib/freebl/gcm-ppc.c b/security/nss/lib/freebl/gcm-ppc.c new file mode 100644 index 0000000000..9bd4f29569 --- /dev/null +++ b/security/nss/lib/freebl/gcm-ppc.c @@ -0,0 +1,109 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "gcm.h" +#include "secerr.h" + +#if defined(USE_PPC_CRYPTO) + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + vec_xst_be((vec_u8)ghash->x, 0, outbuf); + return SECSuccess; +} + +static vec_u64 +vpmsumd(const vec_u64 a, const vec_u64 b) +{ +#if defined(__clang__) + /* Clang uses a different name */ + return __builtin_altivec_crypto_vpmsumd(a, b); +#elif (__GNUC__ >= 10) || (__GNUC__ == 9 && __GNUC_MINOR__ >= 3) || \ + (__GNUC__ == 8 && __GNUC_MINOR__ >= 4) || \ + (__GNUC__ == 7 && __GNUC_MINOR__ >= 5) + /* GCC versions not affected by https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91275 */ + return __builtin_crypto_vpmsumd(a, b); +#else + /* GCC versions where this builtin is buggy */ + vec_u64 vr; + __asm("vpmsumd %0, %1, %2" + : "=v"(vr) + : "v"(a), "v"(b)); + return vr; +#endif +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + const vec_u8 leftshift = vec_splat_u8(1); + const vec_u64 onebit = (vec_u64){ 1, 0 }; + const unsigned long long pd = 0xc2LLU << 56; + + vec_u64 ci, v, r0, r1; + vec_u64 hibit; + unsigned i; + + ci = ghash->x; + + for (i = 0; i < count; i++, buf += 16) { + /* clang needs the following cast away from const; maybe a bug in 7.0.0 */ + v = (vec_u64)vec_xl_be(0, (unsigned char *)buf); + ci ^= v; + + /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */ + r0 = vpmsumd((vec_u64){ ci[0], 0 }, (vec_u64){ ghash->h[0], 0 }); + r1 = vpmsumd((vec_u64){ ci[1], 0 }, (vec_u64){ ghash->h[1], 0 }); + v = (vec_u64){ ci[0] ^ ci[1], ghash->h[0] ^ ghash->h[1] }; + v = vpmsumd((vec_u64){ v[0], 0 }, (vec_u64){ v[1], 0 }); + v ^= r0; + v ^= r1; + r0 ^= (vec_u64){ 0, v[0] }; + r1 ^= (vec_u64){ v[1], 0 }; + + /* Shift one (multiply by x) as gcm spec is stupid. */ + hibit = (vec_u64)vec_splat((vec_u8)r0, 15); + hibit = (vec_u64)vec_rl((vec_u8)hibit, leftshift); + hibit &= onebit; + r0 = vec_sll(r0, leftshift); + r1 = vec_sll(r1, leftshift); + r1 |= hibit; + + /* Reduce */ + v = vpmsumd((vec_u64){ r0[0], 0 }, (vec_u64){ pd, 0 }); + r0 ^= (vec_u64){ 0, v[0] }; + r1 ^= (vec_u64){ v[1], 0 }; + v = vpmsumd((vec_u64){ r0[1], 0 }, (vec_u64){ pd, 0 }); + r1 ^= v; + ci = r0 ^ r1; + } + + ghash->x = ci; + + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + ghash->x = (vec_u64)vec_splat_u32(0); + ghash->h = (vec_u64){ ghash->h_low, ghash->h_high }; + ghash->ghash_mul = gcm_HashMult_hw; + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x = (vec_u64)vec_splat_u32(0); + return SECSuccess; +} + +#endif /* defined(USE_PPC_CRYPTO) */ diff --git a/security/nss/lib/freebl/gcm-x86.c b/security/nss/lib/freebl/gcm-x86.c new file mode 100644 index 0000000000..e34d633943 --- /dev/null +++ b/security/nss/lib/freebl/gcm-x86.c @@ -0,0 +1,127 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "gcm.h" +#include "secerr.h" + +#include /* clmul */ + +#define WRITE64(x, bytes) \ + (bytes)[0] = (x) >> 56; \ + (bytes)[1] = (x) >> 48; \ + (bytes)[2] = (x) >> 40; \ + (bytes)[3] = (x) >> 32; \ + (bytes)[4] = (x) >> 24; \ + (bytes)[5] = (x) >> 16; \ + (bytes)[6] = (x) >> 8; \ + (bytes)[7] = (x); + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + uint64_t tmp_out[2]; + _mm_storeu_si128((__m128i *)tmp_out, ghash->x); + /* maxout must be larger than 16 byte (checked by the caller). */ + WRITE64(tmp_out[0], outbuf + 8); + WRITE64(tmp_out[1], outbuf); + return SECSuccess; +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + size_t i; + pre_align __m128i z_high post_align; + pre_align __m128i z_low post_align; + pre_align __m128i C post_align; + pre_align __m128i D post_align; + pre_align __m128i E post_align; + pre_align __m128i F post_align; + pre_align __m128i bin post_align; + pre_align __m128i Ci post_align; + pre_align __m128i tmp post_align; + + for (i = 0; i < count; i++, buf += 16) { + bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], + ((uint16_t)buf[2] << 8) | buf[3], + ((uint16_t)buf[4] << 8) | buf[5], + ((uint16_t)buf[6] << 8) | buf[7], + ((uint16_t)buf[8] << 8) | buf[9], + ((uint16_t)buf[10] << 8) | buf[11], + ((uint16_t)buf[12] << 8) | buf[13], + ((uint16_t)buf[14] << 8) | buf[15]); + Ci = _mm_xor_si128(bin, ghash->x); + + /* Do binary mult ghash->X = Ci * ghash->H. */ + C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); + D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); + E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); + F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); + tmp = _mm_xor_si128(E, F); + z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); + z_high = _mm_unpackhi_epi64(z_high, D); + z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); + z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); + + /* Shift one to the left (multiply by x) as gcm spec is stupid. */ + C = _mm_slli_si128(z_low, 8); + E = _mm_srli_epi64(C, 63); + D = _mm_slli_si128(z_high, 8); + F = _mm_srli_epi64(D, 63); + /* Carry over */ + C = _mm_srli_si128(z_low, 8); + D = _mm_srli_epi64(C, 63); + z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); + z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); + + /* Reduce */ + C = _mm_slli_si128(z_low, 8); + /* D = z_low << 127 */ + D = _mm_slli_epi64(C, 63); + /* E = z_low << 126 */ + E = _mm_slli_epi64(C, 62); + /* F = z_low << 121 */ + F = _mm_slli_epi64(C, 57); + /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ + z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); + C = _mm_srli_si128(z_low, 8); + /* D = z_low >> 1 */ + D = _mm_slli_epi64(C, 63); + D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); + /* E = z_low >> 2 */ + E = _mm_slli_epi64(C, 62); + E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); + /* F = z_low >> 7 */ + F = _mm_slli_epi64(C, 57); + F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); + /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ + ghash->x = _mm_xor_si128(_mm_xor_si128( + _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), + F); + } + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + ghash->ghash_mul = gcm_HashMult_hw; + ghash->x = _mm_setzero_si128(); + /* MSVC requires __m64 to load epi64. */ + ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, + ghash->h_low >> 32, (uint32_t)ghash->h_low); + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x = _mm_setzero_si128(); + return SECSuccess; +} diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c new file mode 100644 index 0000000000..2dae72419c --- /dev/null +++ b/security/nss/lib/freebl/gcm.c @@ -0,0 +1,1171 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* Thanks to Thomas Pornin for the ideas how to implement the constat time + * binary multiplication. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "blapii.h" +#include "blapit.h" +#include "blapi.h" +#include "gcm.h" +#include "ctr.h" +#include "secerr.h" +#include "prtypes.h" +#include "pkcs11t.h" + +#include + +/* old gcc doesn't support some poly64x2_t intrinsic */ +#if defined(__aarch64__) && defined(IS_LITTLE_ENDIAN) && \ + (defined(__clang__) || defined(__GNUC__) && __GNUC__ > 6) +#define USE_ARM_GCM +#elif defined(__arm__) && defined(IS_LITTLE_ENDIAN) && \ + !defined(NSS_DISABLE_ARM32_NEON) +/* We don't test on big endian platform, so disable this on big endian. */ +#define USE_ARM_GCM +#endif + +/* Forward declarations */ +SECStatus gcm_HashInit_hw(gcmHashContext *ghash); +SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf); +SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count); +SECStatus gcm_HashZeroX_hw(gcmHashContext *ghash); +SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count); +SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count); + +/* Stub definitions for the above *_hw functions, which shouldn't be + * used unless NSS_X86_OR_X64 is defined */ +#if !defined(NSS_X86_OR_X64) && !defined(USE_ARM_GCM) && !defined(USE_PPC_CRYPTO) +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} +#endif /* !NSS_X86_OR_X64 && !USE_ARM_GCM && !USE_PPC_CRYPTO */ + +uint64_t +get64(const unsigned char *bytes) +{ + return ((uint64_t)bytes[0]) << 56 | + ((uint64_t)bytes[1]) << 48 | + ((uint64_t)bytes[2]) << 40 | + ((uint64_t)bytes[3]) << 32 | + ((uint64_t)bytes[4]) << 24 | + ((uint64_t)bytes[5]) << 16 | + ((uint64_t)bytes[6]) << 8 | + ((uint64_t)bytes[7]); +} + +/* Initialize a gcmHashContext */ +SECStatus +gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) +{ + SECStatus rv = SECSuccess; + + ghash->cLen = 0; + ghash->bufLen = 0; + PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); + + ghash->h_low = get64(H + 8); + ghash->h_high = get64(H); +#ifdef USE_ARM_GCM +#if defined(__aarch64__) + if (arm_pmull_support() && !sw) { +#else + if (arm_neon_support() && !sw) { +#endif +#elif defined(USE_PPC_CRYPTO) + if (ppc_crypto_support() && !sw) { +#else + if (clmul_support() && !sw) { +#endif + rv = gcm_HashInit_hw(ghash); + } else { +/* We fall back to the software implementation if we can't use / don't + * want to use pclmul. */ +#ifdef HAVE_INT128_SUPPORT + ghash->ghash_mul = gcm_HashMult_sftw; +#else + ghash->ghash_mul = gcm_HashMult_sftw32; +#endif + ghash->x_high = ghash->x_low = 0; + ghash->hw = PR_FALSE; + } + return rv; +} + +#ifdef HAVE_INT128_SUPPORT +/* Binary multiplication x * y = r_high << 64 | r_low. */ +void +bmul(uint64_t x, uint64_t y, uint64_t *r_high, uint64_t *r_low) +{ + uint128_t x1, x2, x3, x4, x5; + uint128_t y1, y2, y3, y4, y5; + uint128_t r, z; + + uint128_t m1 = (uint128_t)0x2108421084210842 << 64 | 0x1084210842108421; + uint128_t m2 = (uint128_t)0x4210842108421084 << 64 | 0x2108421084210842; + uint128_t m3 = (uint128_t)0x8421084210842108 << 64 | 0x4210842108421084; + uint128_t m4 = (uint128_t)0x0842108421084210 << 64 | 0x8421084210842108; + uint128_t m5 = (uint128_t)0x1084210842108421 << 64 | 0x0842108421084210; + + x1 = x & m1; + y1 = y & m1; + x2 = x & m2; + y2 = y & m2; + x3 = x & m3; + y3 = y & m3; + x4 = x & m4; + y4 = y & m4; + x5 = x & m5; + y5 = y & m5; + + z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2); + r = z & m1; + z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3); + r |= z & m2; + z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4); + r |= z & m3; + z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5); + r |= z & m4; + z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1); + r |= z & m5; + + *r_high = (uint64_t)(r >> 64); + *r_low = (uint64_t)r; +} + +SECStatus +gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + uint64_t ci_low, ci_high; + size_t i; + uint64_t z2_low, z2_high, z0_low, z0_high, z1a_low, z1a_high; + uint128_t z_high = 0, z_low = 0; + + ci_low = ghash->x_low; + ci_high = ghash->x_high; + for (i = 0; i < count; i++, buf += 16) { + ci_low ^= get64(buf + 8); + ci_high ^= get64(buf); + + /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */ + bmul(ci_high, ghash->h_high, &z2_high, &z2_low); + bmul(ci_low, ghash->h_low, &z0_high, &z0_low); + bmul(ci_high ^ ci_low, ghash->h_high ^ ghash->h_low, &z1a_high, &z1a_low); + z1a_high ^= z2_high ^ z0_high; + z1a_low ^= z2_low ^ z0_low; + z_high = ((uint128_t)z2_high << 64) | (z2_low ^ z1a_high); + z_low = (((uint128_t)z0_high << 64) | z0_low) ^ (((uint128_t)z1a_low) << 64); + + /* Shift one (multiply by x) as gcm spec is stupid. */ + z_high = (z_high << 1) | (z_low >> 127); + z_low <<= 1; + + /* Reduce */ + z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); + z_high ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); + ci_low = (uint64_t)z_high; + ci_high = (uint64_t)(z_high >> 64); + } + ghash->x_low = ci_low; + ghash->x_high = ci_high; + return SECSuccess; +} +#else +/* Binary multiplication x * y = r_high << 32 | r_low. */ +void +bmul32(uint32_t x, uint32_t y, uint32_t *r_high, uint32_t *r_low) +{ + uint32_t x0, x1, x2, x3; + uint32_t y0, y1, y2, y3; + uint32_t m1 = (uint32_t)0x11111111; + uint32_t m2 = (uint32_t)0x22222222; + uint32_t m4 = (uint32_t)0x44444444; + uint32_t m8 = (uint32_t)0x88888888; + uint64_t z0, z1, z2, z3; + uint64_t z; + + x0 = x & m1; + x1 = x & m2; + x2 = x & m4; + x3 = x & m8; + y0 = y & m1; + y1 = y & m2; + y2 = y & m4; + y3 = y & m8; + z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^ + ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1); + z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^ + ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2); + z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^ + ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3); + z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^ + ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0); + z0 &= ((uint64_t)m1 << 32) | m1; + z1 &= ((uint64_t)m2 << 32) | m2; + z2 &= ((uint64_t)m4 << 32) | m4; + z3 &= ((uint64_t)m8 << 32) | m8; + z = z0 | z1 | z2 | z3; + *r_high = (uint32_t)(z >> 32); + *r_low = (uint32_t)z; +} + +SECStatus +gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + size_t i; + uint64_t ci_low, ci_high; + uint64_t z_high_h, z_high_l, z_low_h, z_low_l; + uint32_t ci_high_h, ci_high_l, ci_low_h, ci_low_l; + uint32_t b_a_h, b_a_l, a_a_h, a_a_l, b_b_h, b_b_l; + uint32_t a_b_h, a_b_l, b_c_h, b_c_l, a_c_h, a_c_l, c_c_h, c_c_l; + uint32_t ci_highXlow_h, ci_highXlow_l, c_a_h, c_a_l, c_b_h, c_b_l; + + uint32_t h_high_h = (uint32_t)(ghash->h_high >> 32); + uint32_t h_high_l = (uint32_t)ghash->h_high; + uint32_t h_low_h = (uint32_t)(ghash->h_low >> 32); + uint32_t h_low_l = (uint32_t)ghash->h_low; + uint32_t h_highXlow_h = h_high_h ^ h_low_h; + uint32_t h_highXlow_l = h_high_l ^ h_low_l; + uint32_t h_highX_xored = h_highXlow_h ^ h_highXlow_l; + + for (i = 0; i < count; i++, buf += 16) { + ci_low = ghash->x_low ^ get64(buf + 8); + ci_high = ghash->x_high ^ get64(buf); + ci_low_h = (uint32_t)(ci_low >> 32); + ci_low_l = (uint32_t)ci_low; + ci_high_h = (uint32_t)(ci_high >> 32); + ci_high_l = (uint32_t)ci_high; + ci_highXlow_h = ci_high_h ^ ci_low_h; + ci_highXlow_l = ci_high_l ^ ci_low_l; + + /* Do binary mult ghash->X = C * ghash->H (recursive Karatsuba). */ + bmul32(ci_high_h, h_high_h, &a_a_h, &a_a_l); + bmul32(ci_high_l, h_high_l, &a_b_h, &a_b_l); + bmul32(ci_high_h ^ ci_high_l, h_high_h ^ h_high_l, &a_c_h, &a_c_l); + a_c_h ^= a_a_h ^ a_b_h; + a_c_l ^= a_a_l ^ a_b_l; + a_a_l ^= a_c_h; + a_b_h ^= a_c_l; + /* ci_high * h_high = a_a_h:a_a_l:a_b_h:a_b_l */ + + bmul32(ci_low_h, h_low_h, &b_a_h, &b_a_l); + bmul32(ci_low_l, h_low_l, &b_b_h, &b_b_l); + bmul32(ci_low_h ^ ci_low_l, h_low_h ^ h_low_l, &b_c_h, &b_c_l); + b_c_h ^= b_a_h ^ b_b_h; + b_c_l ^= b_a_l ^ b_b_l; + b_a_l ^= b_c_h; + b_b_h ^= b_c_l; + /* ci_low * h_low = b_a_h:b_a_l:b_b_h:b_b_l */ + + bmul32(ci_highXlow_h, h_highXlow_h, &c_a_h, &c_a_l); + bmul32(ci_highXlow_l, h_highXlow_l, &c_b_h, &c_b_l); + bmul32(ci_highXlow_h ^ ci_highXlow_l, h_highX_xored, &c_c_h, &c_c_l); + c_c_h ^= c_a_h ^ c_b_h; + c_c_l ^= c_a_l ^ c_b_l; + c_a_l ^= c_c_h; + c_b_h ^= c_c_l; + /* (ci_high ^ ci_low) * (h_high ^ h_low) = c_a_h:c_a_l:c_b_h:c_b_l */ + + c_a_h ^= b_a_h ^ a_a_h; + c_a_l ^= b_a_l ^ a_a_l; + c_b_h ^= b_b_h ^ a_b_h; + c_b_l ^= b_b_l ^ a_b_l; + z_high_h = ((uint64_t)a_a_h << 32) | a_a_l; + z_high_l = (((uint64_t)a_b_h << 32) | a_b_l) ^ + (((uint64_t)c_a_h << 32) | c_a_l); + z_low_h = (((uint64_t)b_a_h << 32) | b_a_l) ^ + (((uint64_t)c_b_h << 32) | c_b_l); + z_low_l = ((uint64_t)b_b_h << 32) | b_b_l; + + /* Shift one (multiply by x) as gcm spec is stupid. */ + z_high_h = z_high_h << 1 | z_high_l >> 63; + z_high_l = z_high_l << 1 | z_low_h >> 63; + z_low_h = z_low_h << 1 | z_low_l >> 63; + z_low_l <<= 1; + + /* Reduce */ + z_low_h ^= (z_low_l << 63) ^ (z_low_l << 62) ^ (z_low_l << 57); + z_high_h ^= z_low_h ^ (z_low_h >> 1) ^ (z_low_h >> 2) ^ (z_low_h >> 7); + z_high_l ^= z_low_l ^ (z_low_l >> 1) ^ (z_low_l >> 2) ^ (z_low_l >> 7) ^ + (z_low_h << 63) ^ (z_low_h << 62) ^ (z_low_h << 57); + ghash->x_high = z_high_h; + ghash->x_low = z_high_l; + } + return SECSuccess; +} +#endif /* HAVE_INT128_SUPPORT */ + +static SECStatus +gcm_zeroX(gcmHashContext *ghash) +{ + SECStatus rv = SECSuccess; + + if (ghash->hw) { + rv = gcm_HashZeroX_hw(ghash); + } + + ghash->x_high = ghash->x_low = 0; + return rv; +} + +/* + * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult + * function always takes AES_BLOCK_SIZE lengths of data. gcmHash_Update will + * format the data properly. + */ +SECStatus +gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, + unsigned int len) +{ + unsigned int blocks; + SECStatus rv; + + ghash->cLen += (len * PR_BITS_PER_BYTE); + + /* first deal with the current buffer of data. Try to fill it out so + * we can hash it */ + if (ghash->bufLen) { + unsigned int needed = PR_MIN(len, AES_BLOCK_SIZE - ghash->bufLen); + if (needed != 0) { + PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed); + } + buf += needed; + len -= needed; + ghash->bufLen += needed; + if (len == 0) { + /* didn't add enough to hash the data, nothing more do do */ + return SECSuccess; + } + PORT_Assert(ghash->bufLen == AES_BLOCK_SIZE); + /* hash the buffer and clear it */ + rv = ghash->ghash_mul(ghash, ghash->buffer, 1); + PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE); + ghash->bufLen = 0; + if (rv != SECSuccess) { + return SECFailure; + } + } + /* now hash any full blocks remaining in the data stream */ + blocks = len / AES_BLOCK_SIZE; + if (blocks) { + rv = ghash->ghash_mul(ghash, buf, blocks); + if (rv != SECSuccess) { + return SECFailure; + } + buf += blocks * AES_BLOCK_SIZE; + len -= blocks * AES_BLOCK_SIZE; + } + + /* save any remainder in the buffer to be hashed with the next call */ + if (len != 0) { + PORT_Memcpy(ghash->buffer, buf, len); + ghash->bufLen = len; + } + return SECSuccess; +} + +/* + * write out any partial blocks zero padded through the GHASH engine, + * save the lengths for the final completion of the hash + */ +static SECStatus +gcmHash_Sync(gcmHashContext *ghash) +{ + int i; + SECStatus rv; + + /* copy the previous counter to the upper block */ + PORT_Memcpy(ghash->counterBuf, &ghash->counterBuf[GCM_HASH_LEN_LEN], + GCM_HASH_LEN_LEN); + /* copy the current counter in the lower block */ + for (i = 0; i < GCM_HASH_LEN_LEN; i++) { + ghash->counterBuf[GCM_HASH_LEN_LEN + i] = + (ghash->cLen >> ((GCM_HASH_LEN_LEN - 1 - i) * PR_BITS_PER_BYTE)) & 0xff; + } + ghash->cLen = 0; + + /* now zero fill the buffer and hash the last block */ + if (ghash->bufLen) { + PORT_Memset(ghash->buffer + ghash->bufLen, 0, AES_BLOCK_SIZE - ghash->bufLen); + rv = ghash->ghash_mul(ghash, ghash->buffer, 1); + PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE); + ghash->bufLen = 0; + if (rv != SECSuccess) { + return SECFailure; + } + } + return SECSuccess; +} + +#define WRITE64(x, bytes) \ + (bytes)[0] = (x) >> 56; \ + (bytes)[1] = (x) >> 48; \ + (bytes)[2] = (x) >> 40; \ + (bytes)[3] = (x) >> 32; \ + (bytes)[4] = (x) >> 24; \ + (bytes)[5] = (x) >> 16; \ + (bytes)[6] = (x) >> 8; \ + (bytes)[7] = (x); + +/* + * This does the final sync, hashes the lengths, then returns + * "T", the hashed output. + */ +SECStatus +gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout) +{ + unsigned char T[MAX_BLOCK_SIZE]; + SECStatus rv; + + rv = gcmHash_Sync(ghash); + if (rv != SECSuccess) { + goto cleanup; + } + + rv = ghash->ghash_mul(ghash, ghash->counterBuf, + (GCM_HASH_LEN_LEN * 2) / AES_BLOCK_SIZE); + if (rv != SECSuccess) { + goto cleanup; + } + + if (ghash->hw) { + rv = gcm_HashWrite_hw(ghash, T); + if (rv != SECSuccess) { + goto cleanup; + } + } else { + WRITE64(ghash->x_low, T + 8); + WRITE64(ghash->x_high, T); + } + + if (maxout > AES_BLOCK_SIZE) { + maxout = AES_BLOCK_SIZE; + } + PORT_Memcpy(outbuf, T, maxout); + *outlen = maxout; + rv = SECSuccess; + +cleanup: + PORT_Memset(T, 0, sizeof(T)); + return rv; +} + +SECStatus +gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, + unsigned int AADLen) +{ + SECStatus rv; + + // Limit AADLen in accordance with SP800-38D + if (sizeof(AADLen) >= 8) { + unsigned long long AADLen_ull = AADLen; + if (AADLen_ull > (1ULL << 61) - 1) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + + ghash->cLen = 0; + PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2); + ghash->bufLen = 0; + rv = gcm_zeroX(ghash); + if (rv != SECSuccess) { + return rv; + } + + /* now kick things off by hashing the Additional Authenticated Data */ + if (AADLen != 0) { + rv = gcmHash_Update(ghash, AAD, AADLen); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcmHash_Sync(ghash); + if (rv != SECSuccess) { + return SECFailure; + } + } + return SECSuccess; +} + +/************************************************************************** + * Now implement the GCM using gcmHash and CTR * + **************************************************************************/ + +/* state to handle the full GCM operation (hash and counter) */ +struct GCMContextStr { + gcmHashContext *ghash_context; + CTRContext ctr_context; + freeblCipherFunc cipher; + void *cipher_context; + unsigned long tagBits; + unsigned char tagKey[MAX_BLOCK_SIZE]; + PRBool ctr_context_init; + gcmIVContext gcm_iv; +}; + +SECStatus gcm_InitCounter(GCMContext *gcm, const unsigned char *iv, + unsigned int ivLen, unsigned int tagBits, + const unsigned char *aad, unsigned int aadLen); + +GCMContext * +GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params) +{ + GCMContext *gcm = NULL; + gcmHashContext *ghash = NULL; + unsigned char H[MAX_BLOCK_SIZE]; + unsigned int tmp; + const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params; + SECStatus rv; +#ifdef DISABLE_HW_GCM + const PRBool sw = PR_TRUE; +#else + const PRBool sw = PR_FALSE; +#endif + + gcm = PORT_ZNew(GCMContext); + if (gcm == NULL) { + return NULL; + } + gcm->cipher = cipher; + gcm->cipher_context = context; + ghash = PORT_ZNewAligned(gcmHashContext, 16, mem); + + /* first plug in the ghash context */ + gcm->ghash_context = ghash; + PORT_Memset(H, 0, AES_BLOCK_SIZE); + rv = (*cipher)(context, H, &tmp, AES_BLOCK_SIZE, H, AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + goto loser; + } + rv = gcmHash_InitContext(ghash, H, sw); + if (rv != SECSuccess) { + goto loser; + } + + gcm_InitIVContext(&gcm->gcm_iv); + gcm->ctr_context_init = PR_FALSE; + + /* if gcmPara/ms is NULL, then we are creating an PKCS #11 MESSAGE + * style context, in which we initialize the key once, then do separate + * iv/aad's for each message. In that case we only initialize the key + * and ghash. We initialize the counter in each separate message */ + if (gcmParams == NULL) { + /* OK we are finished with init, if we are doing MESSAGE interface, + * return from here */ + return gcm; + } + + rv = gcm_InitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, gcmParams->pAAD, + gcmParams->ulAADLen); + if (rv != SECSuccess) { + goto loser; + } + PORT_Memset(H, 0, AES_BLOCK_SIZE); + gcm->ctr_context_init = PR_TRUE; + return gcm; + +loser: + PORT_Memset(H, 0, AES_BLOCK_SIZE); + if (ghash && ghash->mem) { + void *mem = ghash->mem; + PORT_Memset(ghash, 0, sizeof(gcmHashContext)); + PORT_Free(mem); + } + if (gcm) { + PORT_ZFree(gcm, sizeof(GCMContext)); + } + return NULL; +} + +SECStatus +gcm_InitCounter(GCMContext *gcm, const unsigned char *iv, unsigned int ivLen, + unsigned int tagBits, const unsigned char *aad, + unsigned int aadLen) +{ + gcmHashContext *ghash = gcm->ghash_context; + unsigned int tmp; + PRBool freeCtr = PR_FALSE; + CK_AES_CTR_PARAMS ctrParams; + SECStatus rv; + + /* Verify our parameters here */ + if (ivLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto loser; + } + + if (tagBits != 128 && tagBits != 120 && + tagBits != 112 && tagBits != 104 && + tagBits != 96 && tagBits != 64 && + tagBits != 32) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto loser; + } + + /* fill in the Counter context */ + ctrParams.ulCounterBits = 32; + PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb)); + if (ivLen == 12) { + PORT_Memcpy(ctrParams.cb, iv, ivLen); + ctrParams.cb[AES_BLOCK_SIZE - 1] = 1; + } else { + rv = gcmHash_Reset(ghash, NULL, 0); + if (rv != SECSuccess) { + goto loser; + } + rv = gcmHash_Update(ghash, iv, ivLen); + if (rv != SECSuccess) { + goto loser; + } + rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + goto loser; + } + } + rv = CTR_InitContext(&gcm->ctr_context, gcm->cipher_context, gcm->cipher, + (unsigned char *)&ctrParams); + if (rv != SECSuccess) { + goto loser; + } + freeCtr = PR_TRUE; + + /* fill in the gcm structure */ + gcm->tagBits = tagBits; /* save for final step */ + /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with. + * if this assumption changes, we would need to explicitly clear it here */ + PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey)); + rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, AES_BLOCK_SIZE, + gcm->tagKey, AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + goto loser; + } + + /* finally mix in the AAD data */ + rv = gcmHash_Reset(ghash, aad, aadLen); + if (rv != SECSuccess) { + goto loser; + } + + PORT_Memset(&ctrParams, 0, sizeof ctrParams); + return SECSuccess; + +loser: + PORT_Memset(&ctrParams, 0, sizeof ctrParams); + if (freeCtr) { + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + } + return SECFailure; +} + +void +GCM_DestroyContext(GCMContext *gcm, PRBool freeit) +{ + void *mem = gcm->ghash_context->mem; + /* ctr_context is statically allocated and will be freed when we free + * gcm. call their destroy functions to free up any locally + * allocated data (like mp_int's) */ + if (gcm->ctr_context_init) { + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + } + PORT_Memset(gcm->ghash_context, 0, sizeof(gcmHashContext)); + PORT_Free(mem); + PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits)); + PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey)); + if (freeit) { + PORT_Free(gcm); + } +} + +static SECStatus +gcm_GetTag(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout) +{ + unsigned int tagBytes; + unsigned int extra; + unsigned int i; + SECStatus rv; + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + extra = tagBytes * PR_BITS_PER_BYTE - gcm->tagBits; + + if (outbuf == NULL) { + *outlen = tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + if (maxout < tagBytes) { + *outlen = tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + maxout = tagBytes; + rv = gcmHash_Final(gcm->ghash_context, outbuf, outlen, maxout); + if (rv != SECSuccess) { + return SECFailure; + } + + for (i = 0; i < *outlen; i++) { + outbuf[i] ^= gcm->tagKey[i]; + } + /* mask off any extra bits we got */ + if (extra) { + outbuf[tagBytes - 1] &= ~((1 << extra) - 1); + } + return SECSuccess; +} + +/* + * See The Galois/Counter Mode of Operation, McGrew and Viega. + * GCM is basically counter mode with a specific initialization and + * built in macing operation. + */ +SECStatus +GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + SECStatus rv; + unsigned int tagBytes; + unsigned int len; + + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + if (UINT_MAX - inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen + tagBytes) { + *outlen = inlen + tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, + inbuf, inlen, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen); + if (rv != SECSuccess) { + PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ + *outlen = 0; + return SECFailure; + } + rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen); + if (rv != SECSuccess) { + PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ + *outlen = 0; + return SECFailure; + }; + *outlen += len; + return SECSuccess; +} + +/* + * See The Galois/Counter Mode of Operation, McGrew and Viega. + * GCM is basically counter mode with a specific initialization and + * built in macing operation. NOTE: the only difference between Encrypt + * and Decrypt is when we calculate the mac. That is because the mac must + * always be calculated on the cipher text, not the plain text, so for + * encrypt, we do the CTR update first and for decrypt we do the mac first. + */ +SECStatus +GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + SECStatus rv; + unsigned int tagBytes; + unsigned char tag[MAX_BLOCK_SIZE]; + const unsigned char *intag; + unsigned int len; + + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + /* get the authentication block */ + if (inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + inlen -= tagBytes; + intag = inbuf + inlen; + + /* verify the block */ + rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + return SECFailure; + } + /* Don't decrypt if we can't authenticate the encrypted data! + * This assumes that if tagBits is not a multiple of 8, intag will + * preserve the masked off missing bits. */ + if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) { + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + PORT_Memset(tag, 0, sizeof(tag)); + return SECFailure; + } + PORT_Memset(tag, 0, sizeof(tag)); + /* finish the decryption */ + return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, + inbuf, inlen, AES_BLOCK_SIZE); +} + +void +gcm_InitIVContext(gcmIVContext *gcmIv) +{ + gcmIv->counter = 0; + gcmIv->max_count = 0; + gcmIv->ivGen = CKG_GENERATE; + gcmIv->ivLen = 0; + gcmIv->fixedBits = 0; +} + +/* + * generate the IV on the fly and return it to the application. + * This function keeps a counter, which may be used in the IV + * generation, or may be used in simply to make sure we don't + * generate to many IV's from this same key. + * PKCS #11 defines 4 generating values: + * 1) CKG_NO_GENERATE: just use the passed in IV as it. + * 2) CKG_GENERATE: the application doesn't care what generation + * scheme is use (we default to counter in this code). + * 3) CKG_GENERATE_COUNTER: The IV is the value of a counter. + * 4) CKG_GENERATE_RANDOM: The IV is randomly generated. + * We add a fifth rule: + * 5) CKG_GENERATE_COUNTER_XOR: The Counter value is xor'ed with + * the IV. + * The value fixedBits specifies the number of bits that will be passed + * on from the original IV. The counter or the random data is is loaded + * in the remainder of the IV not covered by fixedBits, overwriting any + * data there. In the xor case the counter is xor'ed with the data in the + * IV. In all cases only bits outside of fixedBits is modified. + * The number of IV's we can generate is restricted by the size of the + * variable part of the IV and the generation algorithm used. Because of + * this, we require subsequent calls on this context to use the same + * generator, IV len, and fixed bits as the first call. + */ +SECStatus +gcm_GenerateIV(gcmIVContext *gcmIv, unsigned char *iv, unsigned int ivLen, + unsigned int fixedBits, CK_GENERATOR_FUNCTION ivGen) +{ + unsigned int i; + unsigned int flexBits; + unsigned int ivOffset; + unsigned int ivNewCount; + unsigned char ivMask; + unsigned char ivSave; + SECStatus rv; + + if (gcmIv->counter != 0) { + /* If we've already generated a message, make sure all subsequent + * messages are using the same generator */ + if ((gcmIv->ivGen != ivGen) || (gcmIv->fixedBits != fixedBits) || + (gcmIv->ivLen != ivLen)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } else { + /* remember these values */ + gcmIv->ivGen = ivGen; + gcmIv->fixedBits = fixedBits; + gcmIv->ivLen = ivLen; + /* now calculate how may bits of IV we have to supply */ + flexBits = ivLen * PR_BITS_PER_BYTE; /* bytes->bits */ + /* first make sure we aren't going to overflow */ + if (flexBits < fixedBits) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + flexBits -= fixedBits; + /* if we are generating a random number reduce the acceptable bits to + * avoid birthday attacks */ + if (ivGen == CKG_GENERATE_RANDOM) { + if (flexBits <= GCMIV_RANDOM_BIRTHDAY_BITS) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* see freebl/blapit.h for how we calculate + * GCMIV_RANDOM_BIRTHDAY_BITS */ + flexBits -= GCMIV_RANDOM_BIRTHDAY_BITS; + flexBits = flexBits >> 1; + } + if (flexBits == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* Turn those bits into the number of IV's we can safely return */ + if (flexBits >= sizeof(gcmIv->max_count) * PR_BITS_PER_BYTE) { + gcmIv->max_count = PR_UINT64(0xffffffffffffffff); + } else { + gcmIv->max_count = PR_UINT64(1) << flexBits; + } + } + + /* no generate, accept the IV from the source */ + if (ivGen == CKG_NO_GENERATE) { + gcmIv->counter = 1; + return SECSuccess; + } + + /* make sure we haven't exceeded the number of IVs we can return + * for this key, generator, and IV size */ + if (gcmIv->counter >= gcmIv->max_count) { + /* use a unique error from just bad user input */ + PORT_SetError(SEC_ERROR_EXTRA_INPUT); + return SECFailure; + } + + /* build to mask to handle the first byte of the IV */ + ivOffset = fixedBits / PR_BITS_PER_BYTE; + ivMask = 0xff >> ((8 - (fixedBits & 7)) & 7); + ivNewCount = ivLen - ivOffset; + + /* finally generate the IV */ + switch (ivGen) { + case CKG_GENERATE: /* default to counter */ + case CKG_GENERATE_COUNTER: + iv[ivOffset] = (iv[ivOffset] & ~ivMask) | + (PORT_GET_BYTE_BE(gcmIv->counter, 0, ivNewCount) & ivMask); + for (i = 1; i < ivNewCount; i++) { + iv[ivOffset + i] = PORT_GET_BYTE_BE(gcmIv->counter, i, ivNewCount); + } + break; + /* for TLS 1.3 */ + case CKG_GENERATE_COUNTER_XOR: + iv[ivOffset] ^= + (PORT_GET_BYTE_BE(gcmIv->counter, 0, ivNewCount) & ivMask); + for (i = 1; i < ivNewCount; i++) { + iv[ivOffset + i] ^= PORT_GET_BYTE_BE(gcmIv->counter, i, ivNewCount); + } + break; + case CKG_GENERATE_RANDOM: + ivSave = iv[ivOffset] & ~ivMask; + rv = RNG_GenerateGlobalRandomBytes(iv + ivOffset, ivNewCount); + iv[ivOffset] = ivSave | (iv[ivOffset] & ivMask); + if (rv != SECSuccess) { + return rv; + } + break; + } + gcmIv->counter++; + return SECSuccess; +} + +SECStatus +GCM_EncryptAEAD(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + SECStatus rv; + unsigned int tagBytes; + unsigned int len; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* if we were initialized with the C_EncryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulIvFixedBits, gcmParams->ivGenerator); + if (rv != SECSuccess) { + return SECFailure; + } + + rv = gcm_InitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, + inbuf, inlen, AES_BLOCK_SIZE); + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen); + if (rv != SECSuccess) { + PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ + *outlen = 0; + return SECFailure; + } + rv = gcm_GetTag(gcm, gcmParams->pTag, &len, tagBytes); + if (rv != SECSuccess) { + PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ + *outlen = 0; + return SECFailure; + }; + return SECSuccess; +} + +SECStatus +GCM_DecryptAEAD(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + SECStatus rv; + unsigned int tagBytes; + unsigned char tag[MAX_BLOCK_SIZE]; + const unsigned char *intag; + unsigned int len; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* if we were initialized with the C_DecryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = gcm_InitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + intag = gcmParams->pTag; + PORT_Assert(tagBytes != 0); + + /* verify the block */ + rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen); + if (rv != SECSuccess) { + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + return SECFailure; + } + rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + return SECFailure; + } + /* Don't decrypt if we can't authenticate the encrypted data! + * This assumes that if tagBits is may not be a multiple of 8, intag will + * preserve the masked off missing bits. */ + if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) { + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + PORT_SetError(SEC_ERROR_BAD_DATA); + PORT_Memset(tag, 0, sizeof(tag)); + return SECFailure; + } + PORT_Memset(tag, 0, sizeof(tag)); + /* finish the decryption */ + rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, + inbuf, inlen, AES_BLOCK_SIZE); + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + return rv; +} diff --git a/security/nss/lib/freebl/gcm.h b/security/nss/lib/freebl/gcm.h new file mode 100644 index 0000000000..21792e0b35 --- /dev/null +++ b/security/nss/lib/freebl/gcm.h @@ -0,0 +1,125 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GCM_H +#define GCM_H 1 + +#include "blapii.h" +#include "pkcs11t.h" +#include + +#ifdef NSS_X86_OR_X64 +/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */ +#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) +#pragma GCC push_options +#pragma GCC target("sse2") +#undef NSS_DISABLE_SSE2 +#define NSS_DISABLE_SSE2 1 +#endif /* GCC <= 4.8 */ + +#include /* __m128i */ + +#ifdef NSS_DISABLE_SSE2 +#undef NSS_DISABLE_SSE2 +#pragma GCC pop_options +#endif /* NSS_DISABLE_SSE2 */ +#endif + +#ifdef __aarch64__ +#include +#endif + +#if defined(__powerpc64__) +#include "ppc-crypto.h" +#endif + +SEC_BEGIN_PROTOS + +#ifdef HAVE_INT128_SUPPORT +typedef unsigned __int128 uint128_t; +#endif + +typedef struct GCMContextStr GCMContext; + +/* + * The context argument is the inner cipher context to use with cipher. The + * GCMContext does not own context. context needs to remain valid for as long + * as the GCMContext is valid. + * + * The cipher argument is a block cipher in the ECB encrypt mode. + */ +GCMContext *GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params); +void GCM_DestroyContext(GCMContext *gcm, PRBool freeit); +SECStatus GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus GCM_EncryptAEAD(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); +SECStatus GCM_DecryptAEAD(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); + +/* These functions are here only so we can test them */ +#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */ +typedef struct gcmHashContextStr gcmHashContext; +typedef SECStatus (*ghash_t)(gcmHashContext *, const unsigned char *, + unsigned int); +pre_align struct gcmHashContextStr { +#ifdef NSS_X86_OR_X64 + __m128i x, h; +#elif defined(__aarch64__) + uint64x2_t x, h; +#elif defined(USE_PPC_CRYPTO) + vec_u64 x, h; +#endif + uint64_t x_low, x_high, h_high, h_low; + unsigned char buffer[MAX_BLOCK_SIZE]; + unsigned int bufLen; + uint8_t counterBuf[16]; + uint64_t cLen; + ghash_t ghash_mul; + PRBool hw; + gcmHashContext *mem; +} post_align; + +typedef struct gcmIVContextStr gcmIVContext; +struct gcmIVContextStr { + PRUint64 counter; + PRUint64 max_count; + CK_GENERATOR_FUNCTION ivGen; + unsigned int fixedBits; + unsigned int ivLen; +}; + +SECStatus gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, + unsigned int len); +SECStatus gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, + PRBool sw); +SECStatus gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, + unsigned int AADLen); +SECStatus gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout); + +void gcm_InitIVContext(gcmIVContext *gcmiv); +SECStatus gcm_GenerateIV(gcmIVContext *gcmIv, unsigned char *iv, + unsigned int ivLen, unsigned int fixedBits, + CK_GENERATOR_FUNCTION ivGen); + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/genload.c b/security/nss/lib/freebl/genload.c new file mode 100644 index 0000000000..832deb58c4 --- /dev/null +++ b/security/nss/lib/freebl/genload.c @@ -0,0 +1,167 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file is meant to be included by other .c files. + * This file takes a "parameter", the scope which includes this + * code shall declare this variable: + * const char *NameOfThisSharedLib; + * + * NameOfThisSharedLib: + * The file name of the shared library that shall be used as the + * "reference library". The loader will attempt to load the requested + * library from the same directory as the reference library. + */ + +#ifdef XP_UNIX +#include +#define BL_MAXSYMLINKS 20 + +/* + * If 'link' is a symbolic link, this function follows the symbolic links + * and returns the pathname of the ultimate source of the symbolic links. + * If 'link' is not a symbolic link, this function returns NULL. + * The caller should call PR_Free to free the string returned by this + * function. + */ +static char* +loader_GetOriginalPathname(const char* link) +{ +#ifdef __GLIBC__ + char* tmp = realpath(link, NULL); + char* resolved; + if (!tmp) + return NULL; + resolved = PR_Malloc(strlen(tmp) + 1); + strcpy(resolved, tmp); /* This is necessary because PR_Free might not be using free() */ + free(tmp); + return resolved; +#else + char* resolved = NULL; + char* input = NULL; + PRUint32 iterations = 0; + PRInt32 len = 0, retlen = 0; + if (!link) { + PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0); + return NULL; + } + len = PR_MAX(1024, strlen(link) + 1); + resolved = PR_Malloc(len); + input = PR_Malloc(len); + if (!resolved || !input) { + if (resolved) { + PR_Free(resolved); + } + if (input) { + PR_Free(input); + } + return NULL; + } + strcpy(input, link); + while ((iterations++ < BL_MAXSYMLINKS) && + ((retlen = readlink(input, resolved, len - 1)) > 0)) { + char* tmp = input; + resolved[retlen] = '\0'; /* NULL termination */ + input = resolved; + resolved = tmp; + } + PR_Free(resolved); + if (iterations == 1 && retlen < 0) { + PR_Free(input); + input = NULL; + } + return input; +#endif +} +#endif /* XP_UNIX */ + +/* + * Load the library with the file name 'name' residing in the same + * directory as the reference library, whose pathname is 'referencePath'. + */ +static PRLibrary* +loader_LoadLibInReferenceDir(const char* referencePath, const char* name) +{ + PRLibrary* dlh = NULL; + char* fullName = NULL; + char* c; + PRLibSpec libSpec; + + /* Remove the trailing filename from referencePath and add the new one */ + c = strrchr(referencePath, PR_GetDirectorySeparator()); + if (c) { + size_t referencePathSize = 1 + c - referencePath; + fullName = (char*)PORT_Alloc(strlen(name) + referencePathSize + 1); + if (fullName) { + memcpy(fullName, referencePath, referencePathSize); + strcpy(fullName + referencePathSize, name); +#ifdef DEBUG_LOADER + PR_fprintf(PR_STDOUT, "\nAttempting to load fully-qualified %s\n", + fullName); +#endif + libSpec.type = PR_LibSpec_Pathname; + libSpec.value.pathname = fullName; + dlh = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL); + PORT_Free(fullName); + } + } + return dlh; +} + +/* + * We use PR_GetLibraryFilePathname to get the pathname of the loaded + * shared lib that contains this function, and then do a PR_LoadLibrary + * with an absolute pathname for the softoken shared library. + */ + +static PRLibrary* +loader_LoadLibrary(const char* nameToLoad) +{ + PRLibrary* lib = NULL; + char* fullPath = NULL; + PRLibSpec libSpec; + + /* Get the pathname for nameOfAlreadyLoadedLib, i.e. /usr/lib/libnss3.so + * PR_GetLibraryFilePathname works with either the base library name or a + * function pointer, depending on the platform. We can't query an exported + * symbol such as NSC_GetFunctionList, because on some platforms we can't + * find symbols in loaded implicit dependencies. + * But we can just get the address of this function ! + */ + fullPath = PR_GetLibraryFilePathname(NameOfThisSharedLib, + (PRFuncPtr)&loader_LoadLibrary); + + if (fullPath) { + lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad); +#ifdef XP_UNIX + if (!lib) { + /* + * If fullPath is a symbolic link, resolve the symbolic + * link and try again. + */ + char* originalfullPath = loader_GetOriginalPathname(fullPath); + if (originalfullPath) { + PR_Free(fullPath); + fullPath = originalfullPath; + lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad); + } + } +#endif + PR_Free(fullPath); + } + if (!lib) { +#ifdef DEBUG_LOADER + PR_fprintf(PR_STDOUT, "\nAttempting to load %s\n", nameToLoad); +#endif + libSpec.type = PR_LibSpec_Pathname; + libSpec.value.pathname = nameToLoad; + lib = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL); + } + if (NULL == lib) { +#ifdef DEBUG_LOADER + PR_fprintf(PR_STDOUT, "\nLoading failed : %s.\n", nameToLoad); +#endif + } + return lib; +} diff --git a/security/nss/lib/freebl/hmacct.c b/security/nss/lib/freebl/hmacct.c new file mode 100644 index 0000000000..a1b2ba35a0 --- /dev/null +++ b/security/nss/lib/freebl/hmacct.c @@ -0,0 +1,325 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secport.h" +#include "hasht.h" +#include "blapit.h" +#include "hmacct.h" +#include "secerr.h" + +/* MAX_HASH_BIT_COUNT_BYTES is the maximum number of bytes in the hash's length + * field. (SHA-384/512 have 128-bit length.) */ +#define MAX_HASH_BIT_COUNT_BYTES 16 + +/* constantTimeGE returns 0xff if a>=b and 0x00 otherwise, where a, b < + * MAX_UINT/2. */ +static unsigned char +constantTimeGE(unsigned int a, unsigned int b) +{ + return PORT_CT_GE(a, b); +} + +/* constantTimeEQ8 returns 0xff if a==b and 0x00 otherwise. */ +static unsigned char +constantTimeEQ(unsigned char a, unsigned char b) +{ + return PORT_CT_EQ(a, b); +} + +/* MAC performs a constant time SSLv3/TLS MAC of |dataLen| bytes of |data|, + * where |dataLen| includes both the authenticated bytes and the MAC tag from + * the sender. |dataLen| must be >= the length of the MAC tag. + * + * |dataTotalLen| is >= |dataLen| and also accounts for any padding bytes + * that may follow the sender's MAC. (Only a single block of padding may + * follow in SSLv3, or up to 255 bytes in TLS.) + * + * Since the results of decryption are secret information (otherwise a + * padding-oracle is created), this function is constant-time with respect to + * |dataLen|. + * + * |header| contains either the 13-byte TLS header (containing the sequence + * number, record type etc), or it contains the SSLv3 header with the SSLv3 + * padding bytes etc. */ +static SECStatus +MAC(unsigned char *mdOut, + unsigned int *mdOutLen, + unsigned int mdOutMax, + const SECHashObject *hashObj, + const unsigned char *macSecret, + unsigned int macSecretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *data, + unsigned int dataLen, + unsigned int dataTotalLen, + unsigned char isSSLv3) +{ + void *mdState = hashObj->create(); + const unsigned int mdSize = hashObj->length; + const unsigned int mdBlockSize = hashObj->blocklength; + /* mdLengthSize is the number of bytes in the length field that terminates + * the hash. + * + * This assumes that hash functions with a 64 byte block size use a 64-bit + * length, and otherwise they use a 128-bit length. This is true of {MD5, + * SHA*} (which are all of the hash functions specified for use with TLS + * today). */ + const unsigned int mdLengthSize = mdBlockSize == 64 ? 8 : 16; + + const unsigned int sslv3PadLen = hashObj->type == HASH_AlgMD5 ? 48 : 40; + + /* varianceBlocks is the number of blocks of the hash that we have to + * calculate in constant time because they could be altered by the + * padding value. + * + * In SSLv3, the padding must be minimal so the end of the plaintext + * varies by, at most, 15+20 = 35 bytes. (We conservatively assume that + * the MAC size varies from 0..20 bytes.) In case the 9 bytes of hash + * termination (0x80 + 64-bit length) don't fit in the final block, we + * say that the final two blocks can vary based on the padding. + * + * TLSv1 has MACs up to 48 bytes long (SHA-384) and the padding is not + * required to be minimal. Therefore we say that the final six blocks + * can vary based on the padding. + * + * Later in the function, if the message is short and there obviously + * cannot be this many blocks then varianceBlocks can be reduced. */ + unsigned int varianceBlocks = isSSLv3 ? 2 : 6; + /* From now on we're dealing with the MAC, which conceptually has 13 + * bytes of `header' before the start of the data (TLS) or 71/75 bytes + * (SSLv3) */ + const unsigned int len = dataTotalLen + headerLen; + /* maxMACBytes contains the maximum bytes of bytes in the MAC, including + * |header|, assuming that there's no padding. */ + const unsigned int maxMACBytes = len - mdSize - 1; + /* numBlocks is the maximum number of hash blocks. */ + const unsigned int numBlocks = + (maxMACBytes + 1 + mdLengthSize + mdBlockSize - 1) / mdBlockSize; + /* macEndOffset is the index just past the end of the data to be + * MACed. */ + const unsigned int macEndOffset = dataLen + headerLen - mdSize; + /* c is the index of the 0x80 byte in the final hash block that + * contains application data. */ + const unsigned int c = macEndOffset % mdBlockSize; + /* indexA is the hash block number that contains the 0x80 terminating + * value. */ + const unsigned int indexA = macEndOffset / mdBlockSize; + /* indexB is the hash block number that contains the 64-bit hash + * length, in bits. */ + const unsigned int indexB = (macEndOffset + mdLengthSize) / mdBlockSize; + /* bits is the hash-length in bits. It includes the additional hash + * block for the masked HMAC key, or whole of |header| in the case of + * SSLv3. */ + unsigned int bits; + /* In order to calculate the MAC in constant time we have to handle + * the final blocks specially because the padding value could cause the + * end to appear somewhere in the final |varianceBlocks| blocks and we + * can't leak where. However, |numStartingBlocks| worth of data can + * be hashed right away because no padding value can affect whether + * they are plaintext. */ + unsigned int numStartingBlocks = 0; + /* k is the starting byte offset into the conceptual header||data where + * we start processing. */ + unsigned int k = 0; + unsigned char lengthBytes[MAX_HASH_BIT_COUNT_BYTES]; + /* hmacPad is the masked HMAC key. */ + unsigned char hmacPad[HASH_BLOCK_LENGTH_MAX]; + unsigned char firstBlock[HASH_BLOCK_LENGTH_MAX]; + unsigned char macOut[HASH_LENGTH_MAX]; + unsigned i, j; + + /* For SSLv3, if we're going to have any starting blocks then we need + * at least two because the header is larger than a single block. */ + if (numBlocks > varianceBlocks + (isSSLv3 ? 1 : 0)) { + numStartingBlocks = numBlocks - varianceBlocks; + k = mdBlockSize * numStartingBlocks; + } + + bits = 8 * macEndOffset; + hashObj->begin(mdState); + if (!isSSLv3) { + /* Compute the initial HMAC block. For SSLv3, the padding and + * secret bytes are included in |header| because they take more + * than a single block. */ + bits += 8 * mdBlockSize; + memset(hmacPad, 0, mdBlockSize); + PORT_Assert(macSecretLen <= sizeof(hmacPad)); + memcpy(hmacPad, macSecret, macSecretLen); + for (i = 0; i < mdBlockSize; i++) + hmacPad[i] ^= 0x36; + hashObj->update(mdState, hmacPad, mdBlockSize); + } + + j = 0; + memset(lengthBytes, 0, sizeof(lengthBytes)); + if (mdLengthSize == 16) { + j = 8; + } + if (hashObj->type == HASH_AlgMD5) { + /* MD5 appends a little-endian length. */ + for (i = 0; i < 4; i++) { + lengthBytes[i + j] = bits >> (8 * i); + } + } else { + /* All other TLS hash functions use a big-endian length. */ + for (i = 0; i < 4; i++) { + lengthBytes[4 + i + j] = bits >> (8 * (3 - i)); + } + } + + if (k > 0) { + if (isSSLv3) { + /* The SSLv3 header is larger than a single block. + * overhang is the number of bytes beyond a single + * block that the header consumes: either 7 bytes + * (SHA1) or 11 bytes (MD5). */ + const unsigned int overhang = headerLen - mdBlockSize; + hashObj->update(mdState, header, mdBlockSize); + memcpy(firstBlock, header + mdBlockSize, overhang); + memcpy(firstBlock + overhang, data, mdBlockSize - overhang); + hashObj->update(mdState, firstBlock, mdBlockSize); + for (i = 1; i < k / mdBlockSize - 1; i++) { + hashObj->update(mdState, data + mdBlockSize * i - overhang, + mdBlockSize); + } + } else { + /* k is a multiple of mdBlockSize. */ + memcpy(firstBlock, header, 13); + memcpy(firstBlock + 13, data, mdBlockSize - 13); + hashObj->update(mdState, firstBlock, mdBlockSize); + for (i = 1; i < k / mdBlockSize; i++) { + hashObj->update(mdState, data + mdBlockSize * i - 13, + mdBlockSize); + } + } + } + + memset(macOut, 0, sizeof(macOut)); + + /* We now process the final hash blocks. For each block, we construct + * it in constant time. If i == indexA then we'll include the 0x80 + * bytes and zero pad etc. For each block we selectively copy it, in + * constant time, to |macOut|. */ + for (i = numStartingBlocks; i <= numStartingBlocks + varianceBlocks; i++) { + unsigned char block[HASH_BLOCK_LENGTH_MAX]; + unsigned char isBlockA = constantTimeEQ(i, indexA); + unsigned char isBlockB = constantTimeEQ(i, indexB); + for (j = 0; j < mdBlockSize; j++) { + unsigned char isPastC = isBlockA & constantTimeGE(j, c); + unsigned char isPastCPlus1 = isBlockA & constantTimeGE(j, c + 1); + unsigned char b = 0; + if (k < headerLen) { + b = header[k]; + } else if (k < dataTotalLen + headerLen) { + b = data[k - headerLen]; + } + k++; + + /* If this is the block containing the end of the + * application data, and we are at the offset for the + * 0x80 value, then overwrite b with 0x80. */ + b = (b & ~isPastC) | (0x80 & isPastC); + /* If this the the block containing the end of the + * application data and we're past the 0x80 value then + * just write zero. */ + b = b & ~isPastCPlus1; + /* If this is indexB (the final block), but not + * indexA (the end of the data), then the 64-bit + * length didn't fit into indexA and we're having to + * add an extra block of zeros. */ + b &= ~isBlockB | isBlockA; + + /* The final bytes of one of the blocks contains the length. */ + if (j >= mdBlockSize - mdLengthSize) { + /* If this is indexB, write a length byte. */ + b = (b & ~isBlockB) | + (isBlockB & lengthBytes[j - (mdBlockSize - mdLengthSize)]); + } + block[j] = b; + } + + hashObj->update(mdState, block, mdBlockSize); + hashObj->end_raw(mdState, block, NULL, mdSize); + /* If this is indexB, copy the hash value to |macOut|. */ + for (j = 0; j < mdSize; j++) { + macOut[j] |= block[j] & isBlockB; + } + } + + hashObj->begin(mdState); + + if (isSSLv3) { + /* We repurpose |hmacPad| to contain the SSLv3 pad2 block. */ + for (i = 0; i < sslv3PadLen; i++) + hmacPad[i] = 0x5c; + + hashObj->update(mdState, macSecret, macSecretLen); + hashObj->update(mdState, hmacPad, sslv3PadLen); + hashObj->update(mdState, macOut, mdSize); + } else { + /* Complete the HMAC in the standard manner. */ + for (i = 0; i < mdBlockSize; i++) + hmacPad[i] ^= 0x6a; + + hashObj->update(mdState, hmacPad, mdBlockSize); + hashObj->update(mdState, macOut, mdSize); + } + + hashObj->end(mdState, mdOut, mdOutLen, mdOutMax); + hashObj->destroy(mdState, PR_TRUE); + + PORT_Memset(lengthBytes, 0, sizeof lengthBytes); + PORT_Memset(hmacPad, 0, sizeof hmacPad); + PORT_Memset(firstBlock, 0, sizeof firstBlock); + PORT_Memset(macOut, 0, sizeof macOut); + + return SECSuccess; +} + +SECStatus +HMAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (hashObj->end_raw == NULL) + return SECFailure; + return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen, + header, headerLen, body, bodyLen, bodyTotalLen, + 0 /* not SSLv3 */); +} + +SECStatus +SSLv3_MAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (hashObj->end_raw == NULL) + return SECFailure; + return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen, + header, headerLen, body, bodyLen, bodyTotalLen, + 1 /* SSLv3 */); +} diff --git a/security/nss/lib/freebl/hmacct.h b/security/nss/lib/freebl/hmacct.h new file mode 100644 index 0000000000..a773ea89c7 --- /dev/null +++ b/security/nss/lib/freebl/hmacct.h @@ -0,0 +1,38 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _HMACCT_H_ +#define _HMACCT_H_ + +SEC_BEGIN_PROTOS + +extern SECStatus HMAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + +extern SECStatus SSLv3_MAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/intel-aes-x64-masm.asm b/security/nss/lib/freebl/intel-aes-x64-masm.asm new file mode 100644 index 0000000000..fe183bca03 --- /dev/null +++ b/security/nss/lib/freebl/intel-aes-x64-masm.asm @@ -0,0 +1,964 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.DATA +ALIGN 16 +Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh +Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h +Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh +Lcon1 dd 1,1,1,1 +Lcon2 dd 1bh,1bh,1bh,1bh + +.CODE + +ctx textequ +output textequ +input textequ +inputLen textequ + + +aes_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesenc xmm0, xmm8 + aesenc xmm1, xmm8 + aesenc xmm2, xmm8 + aesenc xmm3, xmm8 + aesenc xmm4, xmm8 + aesenc xmm5, xmm8 + aesenc xmm6, xmm8 + aesenc xmm7, xmm8 + ENDM + +aes_last_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesenclast xmm0, xmm8 + aesenclast xmm1, xmm8 + aesenclast xmm2, xmm8 + aesenclast xmm3, xmm8 + aesenclast xmm4, xmm8 + aesenclast xmm5, xmm8 + aesenclast xmm6, xmm8 + aesenclast xmm7, xmm8 + ENDM + +aes_dec_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesdec xmm0, xmm8 + aesdec xmm1, xmm8 + aesdec xmm2, xmm8 + aesdec xmm3, xmm8 + aesdec xmm4, xmm8 + aesdec xmm5, xmm8 + aesdec xmm6, xmm8 + aesdec xmm7, xmm8 + ENDM + +aes_dec_last_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesdeclast xmm0, xmm8 + aesdeclast xmm1, xmm8 + aesdeclast xmm2, xmm8 + aesdeclast xmm3, xmm8 + aesdeclast xmm4, xmm8 + aesdeclast xmm5, xmm8 + aesdeclast xmm6, xmm8 + aesdeclast xmm7, xmm8 + ENDM + + +gen_aes_ecb_func MACRO enc, rnds + +LOCAL loop8 +LOCAL loop1 +LOCAL bail + + xor inputLen, inputLen + mov input, [rsp + 1*8 + 8*4] + mov inputLen, [rsp + 1*8 + 8*5] + + sub rsp, 3*16 + + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + +loop8: + cmp inputLen, 8*16 + jb loop1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + movdqu xmm7, [7*16 + input] + + movdqu xmm8, [0*16 + ctx] + pxor xmm0, xmm8 + pxor xmm1, xmm8 + pxor xmm2, xmm8 + pxor xmm3, xmm8 + pxor xmm4, xmm8 + pxor xmm5, xmm8 + pxor xmm6, xmm8 + pxor xmm7, xmm8 + +IF enc eq 1 + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ELSE + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ENDIF + + i = 1 + WHILE i LT rnds + rnd i + i = i+1 + ENDM + lastrnd rnds + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [7*16 + output], xmm7 + + lea input, [8*16 + input] + lea output, [8*16 + output] + sub inputLen, 8*16 + jmp loop8 + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesinst xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aeslastinst xmm0, xmm7 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + xor rax, rax + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + ret +ENDM + +intel_aes_encrypt_ecb_128 PROC +gen_aes_ecb_func 1, 10 +intel_aes_encrypt_ecb_128 ENDP + +intel_aes_encrypt_ecb_192 PROC +gen_aes_ecb_func 1, 12 +intel_aes_encrypt_ecb_192 ENDP + +intel_aes_encrypt_ecb_256 PROC +gen_aes_ecb_func 1, 14 +intel_aes_encrypt_ecb_256 ENDP + +intel_aes_decrypt_ecb_128 PROC +gen_aes_ecb_func 0, 10 +intel_aes_decrypt_ecb_128 ENDP + +intel_aes_decrypt_ecb_192 PROC +gen_aes_ecb_func 0, 12 +intel_aes_decrypt_ecb_192 ENDP + +intel_aes_decrypt_ecb_256 PROC +gen_aes_ecb_func 0, 14 +intel_aes_decrypt_ecb_256 ENDP + + +KEY textequ +KS textequ +ITR textequ + +intel_aes_encrypt_init_128 PROC + + movdqu xmm1, [KEY] + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + lea ITR, Lcon1 + movdqa xmm0, [ITR] + lea ITR, Lmask + movdqa xmm4, [ITR] + + mov ITR, 8 + +Lenc_128_ks_loop: + lea KS, [16 + KS] + dec ITR + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + jne Lenc_128_ks_loop + + lea ITR, Lcon2 + movdqa xmm0, [ITR] + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [16 + KS], xmm1 + movdqa xmm2, xmm1 + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [32 + KS], xmm1 + movdqa xmm2, xmm1 + + ret +intel_aes_encrypt_init_128 ENDP + + +intel_aes_decrypt_init_128 PROC + + push KS + push KEY + + call intel_aes_encrypt_init_128 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [10*16 + KS] + movdqu [10*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 5 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(10-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(10-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [5*16 + KS] + aesimc xmm0, xmm0 + movdqu [5*16 + KS], xmm0 + ret +intel_aes_decrypt_init_128 ENDP + + +intel_aes_encrypt_init_192 PROC + + sub rsp, 16*2 + movdqu [16*0 + rsp], xmm6 + movdqu [16*1 + rsp], xmm7 + + movdqu xmm1, [KEY] + mov ITR, [16 + KEY] + movd xmm3, ITR + + movdqu [KS], xmm1 + movdqa xmm5, xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask192 + movdqu xmm4, [ITR] + + mov ITR, 4 + +Lenc_192_ks_loop: + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqa xmm6, xmm1 + shufpd xmm5, xmm1, 00h + shufpd xmm6, xmm3, 01h + + movdqu [16 + KS], xmm5 + movdqu [32 + KS], xmm6 + + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqu [48 + KS], xmm1 + movdqa xmm5, xmm3 + + lea KS, [48 + KS] + + dec ITR + jnz Lenc_192_ks_loop + + movdqu [16 + KS], xmm5 + + movdqu xmm7, [16*1 + rsp] + movdqu xmm6, [16*0 + rsp] + add rsp, 16*2 + ret +intel_aes_encrypt_init_192 ENDP + +intel_aes_decrypt_init_192 PROC + push KS + push KEY + + call intel_aes_encrypt_init_192 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [12*16 + KS] + movdqu [12*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 6 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(12-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(12-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [6*16 + KS] + aesimc xmm0, xmm0 + movdqu [6*16 + KS], xmm0 + ret +intel_aes_decrypt_init_192 ENDP + + +intel_aes_encrypt_init_256 PROC + sub rsp, 16*2 + movdqu [16*0 + rsp], xmm6 + movdqu [16*1 + rsp], xmm7 + + movdqu xmm1, [16*0 + KEY] + movdqu xmm3, [16*1 + KEY] + + movdqu [16*0 + KS], xmm1 + movdqu [16*1 + KS], xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask256 + movdqu xmm5, [ITR] + + pxor xmm6, xmm6 + + mov ITR, 6 + +Lenc_256_ks_loop: + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + pshufd xmm2, xmm1, 0ffh + aesenclast xmm2, xmm6 + movdqa xmm4, xmm3 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + movdqu [16*3 + KS], xmm3 + + lea KS, [32 + KS] + dec ITR + jnz Lenc_256_ks_loop + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + movdqu xmm7, [16*1 + rsp] + movdqu xmm6, [16*0 + rsp] + add rsp, 16*2 + ret + +intel_aes_encrypt_init_256 ENDP + + +intel_aes_decrypt_init_256 PROC + push KS + push KEY + + call intel_aes_encrypt_init_256 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [14*16 + KS] + movdqu [14*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 7 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(14-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(14-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [7*16 + KS] + aesimc xmm0, xmm0 + movdqu [7*16 + KS], xmm0 + ret +intel_aes_decrypt_init_256 ENDP + + + +gen_aes_cbc_enc_func MACRO rnds + +LOCAL loop1 +LOCAL bail + + mov input, [rsp + 1*8 + 8*4] + mov inputLen, [rsp + 1*8 + 8*5] + + sub rsp, 3*16 + + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + + movdqu xmm0, [256+ctx] + + movdqu xmm2, [0*16 + ctx] + movdqu xmm3, [1*16 + ctx] + movdqu xmm4, [2*16 + ctx] + movdqu xmm5, [3*16 + ctx] + movdqu xmm6, [4*16 + ctx] + movdqu xmm7, [5*16 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm1, [input] + pxor xmm1, xmm2 + pxor xmm0, xmm1 + + aesenc xmm0, xmm3 + aesenc xmm0, xmm4 + aesenc xmm0, xmm5 + aesenc xmm0, xmm6 + aesenc xmm0, xmm7 + + i = 6 + WHILE i LT rnds + movdqu xmm8, [i*16 + ctx] + aesenc xmm0, xmm8 + i = i+1 + ENDM + movdqu xmm8, [rnds*16 + ctx] + aesenclast xmm0, xmm8 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [256+ctx], xmm0 + + xor rax, rax + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + ret + +ENDM + +gen_aes_cbc_dec_func MACRO rnds + +LOCAL loop8 +LOCAL loop1 +LOCAL dec1 +LOCAL bail + + mov input, [rsp + 1*8 + 8*4] + mov inputLen, [rsp + 1*8 + 8*5] + + sub rsp, 3*16 + + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + +loop8: + cmp inputLen, 8*16 + jb dec1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + movdqu xmm7, [7*16 + input] + + movdqu xmm8, [0*16 + ctx] + pxor xmm0, xmm8 + pxor xmm1, xmm8 + pxor xmm2, xmm8 + pxor xmm3, xmm8 + pxor xmm4, xmm8 + pxor xmm5, xmm8 + pxor xmm6, xmm8 + pxor xmm7, xmm8 + + i = 1 + WHILE i LT rnds + aes_dec_rnd i + i = i+1 + ENDM + aes_dec_last_rnd rnds + + movdqu xmm8, [256 + ctx] + pxor xmm0, xmm8 + movdqu xmm8, [0*16 + input] + pxor xmm1, xmm8 + movdqu xmm8, [1*16 + input] + pxor xmm2, xmm8 + movdqu xmm8, [2*16 + input] + pxor xmm3, xmm8 + movdqu xmm8, [3*16 + input] + pxor xmm4, xmm8 + movdqu xmm8, [4*16 + input] + pxor xmm5, xmm8 + movdqu xmm8, [5*16 + input] + pxor xmm6, xmm8 + movdqu xmm8, [6*16 + input] + pxor xmm7, xmm8 + movdqu xmm8, [7*16 + input] + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [7*16 + output], xmm7 + movdqu [256 + ctx], xmm8 + + lea input, [8*16 + input] + lea output, [8*16 + output] + sub inputLen, 8*16 + jmp loop8 +dec1: + + movdqu xmm3, [256 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqa xmm4, xmm0 + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesdec xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesdeclast xmm0, xmm7 + pxor xmm3, xmm0 + + movdqu [output], xmm3 + movdqa xmm3, xmm4 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [256 + ctx], xmm3 + xor rax, rax + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + ret +ENDM + +intel_aes_encrypt_cbc_128 PROC +gen_aes_cbc_enc_func 10 +intel_aes_encrypt_cbc_128 ENDP + +intel_aes_encrypt_cbc_192 PROC +gen_aes_cbc_enc_func 12 +intel_aes_encrypt_cbc_192 ENDP + +intel_aes_encrypt_cbc_256 PROC +gen_aes_cbc_enc_func 14 +intel_aes_encrypt_cbc_256 ENDP + +intel_aes_decrypt_cbc_128 PROC +gen_aes_cbc_dec_func 10 +intel_aes_decrypt_cbc_128 ENDP + +intel_aes_decrypt_cbc_192 PROC +gen_aes_cbc_dec_func 12 +intel_aes_decrypt_cbc_192 ENDP + +intel_aes_decrypt_cbc_256 PROC +gen_aes_cbc_dec_func 14 +intel_aes_decrypt_cbc_256 ENDP + + + +ctrCtx textequ +CTR textequ +CTRSave textequ + +gen_aes_ctr_func MACRO rnds + +LOCAL loop8 +LOCAL loop1 +LOCAL enc1 +LOCAL bail + + mov input, [rsp + 8*1 + 4*8] + mov inputLen, [rsp + 8*1 + 5*8] + + mov ctrCtx, ctx + mov ctx, [8+ctrCtx] + + sub rsp, 3*16 + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + + + push rbp + mov rbp, rsp + sub rsp, 8*16 + and rsp, -16 + + + movdqu xmm0, [16+ctrCtx] + mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] + bswap CTRSave + movdqu xmm1, [ctx + 0*16] + + pxor xmm0, xmm1 + + movdqa [rsp + 0*16], xmm0 + movdqa [rsp + 1*16], xmm0 + movdqa [rsp + 2*16], xmm0 + movdqa [rsp + 3*16], xmm0 + movdqa [rsp + 4*16], xmm0 + movdqa [rsp + 5*16], xmm0 + movdqa [rsp + 6*16], xmm0 + movdqa [rsp + 7*16], xmm0 + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 1*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 2*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 3*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 4*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 5*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 6*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 7*16 + 3*4], CTR + + +loop8: + cmp inputLen, 8*16 + jb loop1 + + movdqu xmm0, [0*16 + rsp] + movdqu xmm1, [1*16 + rsp] + movdqu xmm2, [2*16 + rsp] + movdqu xmm3, [3*16 + rsp] + movdqu xmm4, [4*16 + rsp] + movdqu xmm5, [5*16 + rsp] + movdqu xmm6, [6*16 + rsp] + movdqu xmm7, [7*16 + rsp] + + i = 1 + WHILE i LE 8 + aes_rnd i + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR + + i = i+1 + ENDM + WHILE i LT rnds + aes_rnd i + i = i+1 + ENDM + aes_last_rnd rnds + + movdqu xmm8, [0*16 + input] + pxor xmm0, xmm8 + movdqu xmm8, [1*16 + input] + pxor xmm1, xmm8 + movdqu xmm8, [2*16 + input] + pxor xmm2, xmm8 + movdqu xmm8, [3*16 + input] + pxor xmm3, xmm8 + movdqu xmm8, [4*16 + input] + pxor xmm4, xmm8 + movdqu xmm8, [5*16 + input] + pxor xmm5, xmm8 + movdqu xmm8, [6*16 + input] + pxor xmm6, xmm8 + movdqu xmm8, [7*16 + input] + pxor xmm7, xmm8 + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [7*16 + output], xmm7 + + lea input, [8*16 + input] + lea output, [8*16 + output] + sub inputLen, 8*16 + jmp loop8 + + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [rsp] + add rsp, 16 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesenclast xmm0, xmm7 + + movdqu xmm7, [input] + pxor xmm0, xmm7 + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + + movdqu xmm0, [rsp] + movdqu xmm1, [ctx + 0*16] + pxor xmm0, xmm1 + movdqu [16+ctrCtx], xmm0 + + + xor rax, rax + mov rsp, rbp + pop rbp + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + + ret +ENDM + + +intel_aes_encrypt_ctr_128 PROC +gen_aes_ctr_func 10 +intel_aes_encrypt_ctr_128 ENDP + +intel_aes_encrypt_ctr_192 PROC +gen_aes_ctr_func 12 +intel_aes_encrypt_ctr_192 ENDP + +intel_aes_encrypt_ctr_256 PROC +gen_aes_ctr_func 14 +intel_aes_encrypt_ctr_256 ENDP + + +END diff --git a/security/nss/lib/freebl/intel-aes-x86-masm.asm b/security/nss/lib/freebl/intel-aes-x86-masm.asm new file mode 100644 index 0000000000..790c951e7c --- /dev/null +++ b/security/nss/lib/freebl/intel-aes-x86-masm.asm @@ -0,0 +1,942 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.MODEL FLAT, C +.XMM + +.DATA +ALIGN 16 +Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh +Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h +Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh +Lcon1 dd 1,1,1,1 +Lcon2 dd 1bh,1bh,1bh,1bh + +.CODE + +ctx textequ +output textequ +input textequ +inputLen textequ + + +aes_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + aesenc xmm1, xmm7 + aesenc xmm2, xmm7 + aesenc xmm3, xmm7 + aesenc xmm4, xmm7 + aesenc xmm5, xmm7 + aesenc xmm6, xmm7 + ENDM + +aes_last_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesenclast xmm0, xmm7 + aesenclast xmm1, xmm7 + aesenclast xmm2, xmm7 + aesenclast xmm3, xmm7 + aesenclast xmm4, xmm7 + aesenclast xmm5, xmm7 + aesenclast xmm6, xmm7 + ENDM + +aes_dec_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesdec xmm0, xmm7 + aesdec xmm1, xmm7 + aesdec xmm2, xmm7 + aesdec xmm3, xmm7 + aesdec xmm4, xmm7 + aesdec xmm5, xmm7 + aesdec xmm6, xmm7 + ENDM + +aes_dec_last_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesdeclast xmm0, xmm7 + aesdeclast xmm1, xmm7 + aesdeclast xmm2, xmm7 + aesdeclast xmm3, xmm7 + aesdeclast xmm4, xmm7 + aesdeclast xmm5, xmm7 + aesdeclast xmm6, xmm7 + ENDM + + +gen_aes_ecb_func MACRO enc, rnds + +LOCAL loop7 +LOCAL loop1 +LOCAL bail + + push inputLen + + mov ctx, [esp + 2*4 + 0*4] + mov output, [esp + 2*4 + 1*4] + mov input, [esp + 2*4 + 4*4] + mov inputLen, [esp + 2*4 + 5*4] + +loop7: + cmp inputLen, 7*16 + jb loop1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + pxor xmm1, xmm7 + pxor xmm2, xmm7 + pxor xmm3, xmm7 + pxor xmm4, xmm7 + pxor xmm5, xmm7 + pxor xmm6, xmm7 + +IF enc eq 1 + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ELSE + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ENDIF + + i = 1 + WHILE i LT rnds + rnd i + i = i+1 + ENDM + lastrnd rnds + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + + lea input, [7*16 + input] + lea output, [7*16 + output] + sub inputLen, 7*16 + jmp loop7 + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesinst xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aeslastinst xmm0, xmm7 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + xor eax, eax + pop inputLen + ret + +ENDM + +ALIGN 16 +intel_aes_encrypt_ecb_128 PROC +gen_aes_ecb_func 1, 10 +intel_aes_encrypt_ecb_128 ENDP + +ALIGN 16 +intel_aes_encrypt_ecb_192 PROC +gen_aes_ecb_func 1, 12 +intel_aes_encrypt_ecb_192 ENDP + +ALIGN 16 +intel_aes_encrypt_ecb_256 PROC +gen_aes_ecb_func 1, 14 +intel_aes_encrypt_ecb_256 ENDP + +ALIGN 16 +intel_aes_decrypt_ecb_128 PROC +gen_aes_ecb_func 0, 10 +intel_aes_decrypt_ecb_128 ENDP + +ALIGN 16 +intel_aes_decrypt_ecb_192 PROC +gen_aes_ecb_func 0, 12 +intel_aes_decrypt_ecb_192 ENDP + +ALIGN 16 +intel_aes_decrypt_ecb_256 PROC +gen_aes_ecb_func 0, 14 +intel_aes_decrypt_ecb_256 ENDP + + +KEY textequ +KS textequ +ITR textequ + +ALIGN 16 +intel_aes_encrypt_init_128 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + + movdqu xmm1, [KEY] + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + lea ITR, Lcon1 + movdqa xmm0, [ITR] + lea ITR, Lmask + movdqa xmm4, [ITR] + + mov ITR, 8 + +Lenc_128_ks_loop: + lea KS, [16 + KS] + dec ITR + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + jne Lenc_128_ks_loop + + lea ITR, Lcon2 + movdqa xmm0, [ITR] + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [16 + KS], xmm1 + movdqa xmm2, xmm1 + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [32 + KS], xmm1 + movdqa xmm2, xmm1 + + ret +intel_aes_encrypt_init_128 ENDP + + +ALIGN 16 +intel_aes_decrypt_init_128 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + push KS + push KEY + + call intel_aes_encrypt_init_128 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [10*16 + KS] + movdqu [10*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 5 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(10-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(10-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [5*16 + KS] + aesimc xmm0, xmm0 + movdqu [5*16 + KS], xmm0 + ret +intel_aes_decrypt_init_128 ENDP + + +ALIGN 16 +intel_aes_encrypt_init_192 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + pxor xmm3, xmm3 + movdqu xmm1, [KEY] + pinsrd xmm3, DWORD PTR [16 + KEY], 0 + pinsrd xmm3, DWORD PTR [20 + KEY], 1 + + movdqu [KS], xmm1 + movdqa xmm5, xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask192 + movdqu xmm4, [ITR] + + mov ITR, 4 + +Lenc_192_ks_loop: + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqa xmm6, xmm1 + shufpd xmm5, xmm1, 00h + shufpd xmm6, xmm3, 01h + + movdqu [16 + KS], xmm5 + movdqu [32 + KS], xmm6 + + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqu [48 + KS], xmm1 + movdqa xmm5, xmm3 + + lea KS, [48 + KS] + + dec ITR + jnz Lenc_192_ks_loop + + movdqu [16 + KS], xmm5 +ret +intel_aes_encrypt_init_192 ENDP + +ALIGN 16 +intel_aes_decrypt_init_192 PROC + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + push KS + push KEY + + call intel_aes_encrypt_init_192 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [12*16 + KS] + movdqu [12*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 6 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(12-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(12-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [6*16 + KS] + aesimc xmm0, xmm0 + movdqu [6*16 + KS], xmm0 + ret +intel_aes_decrypt_init_192 ENDP + +ALIGN 16 +intel_aes_encrypt_init_256 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + movdqu xmm1, [16*0 + KEY] + movdqu xmm3, [16*1 + KEY] + + movdqu [16*0 + KS], xmm1 + movdqu [16*1 + KS], xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask256 + movdqu xmm5, [ITR] + + pxor xmm6, xmm6 + + mov ITR, 6 + +Lenc_256_ks_loop: + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + pshufd xmm2, xmm1, 0ffh + aesenclast xmm2, xmm6 + movdqa xmm4, xmm3 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + movdqu [16*3 + KS], xmm3 + + lea KS, [32 + KS] + dec ITR + jnz Lenc_256_ks_loop + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + ret +intel_aes_encrypt_init_256 ENDP + +ALIGN 16 +intel_aes_decrypt_init_256 PROC + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + push KS + push KEY + + call intel_aes_encrypt_init_256 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [14*16 + KS] + movdqu [14*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 7 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(14-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(14-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [7*16 + KS] + aesimc xmm0, xmm0 + movdqu [7*16 + KS], xmm0 + ret +intel_aes_decrypt_init_256 ENDP + + + +gen_aes_cbc_enc_func MACRO rnds + +LOCAL loop1 +LOCAL bail + + push inputLen + + mov ctx, [esp + 2*4 + 0*4] + mov output, [esp + 2*4 + 1*4] + mov input, [esp + 2*4 + 4*4] + mov inputLen, [esp + 2*4 + 5*4] + + movdqu xmm0, [252+ctx] + + movdqu xmm2, [0*16 + ctx] + movdqu xmm3, [1*16 + ctx] + movdqu xmm4, [2*16 + ctx] + movdqu xmm5, [3*16 + ctx] + movdqu xmm6, [4*16 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm1, [input] + pxor xmm1, xmm2 + pxor xmm0, xmm1 + + aesenc xmm0, xmm3 + aesenc xmm0, xmm4 + aesenc xmm0, xmm5 + aesenc xmm0, xmm6 + + i = 5 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesenclast xmm0, xmm7 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [252+ctx], xmm0 + + xor eax, eax + pop inputLen + ret + +ENDM + +gen_aes_cbc_dec_func MACRO rnds + +LOCAL loop7 +LOCAL loop1 +LOCAL dec1 +LOCAL bail + + push inputLen + + mov ctx, [esp + 2*4 + 0*4] + mov output, [esp + 2*4 + 1*4] + mov input, [esp + 2*4 + 4*4] + mov inputLen, [esp + 2*4 + 5*4] + +loop7: + cmp inputLen, 7*16 + jb dec1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + pxor xmm1, xmm7 + pxor xmm2, xmm7 + pxor xmm3, xmm7 + pxor xmm4, xmm7 + pxor xmm5, xmm7 + pxor xmm6, xmm7 + + i = 1 + WHILE i LT rnds + aes_dec_rnd i + i = i+1 + ENDM + aes_dec_last_rnd rnds + + movdqu xmm7, [252 + ctx] + pxor xmm0, xmm7 + movdqu xmm7, [0*16 + input] + pxor xmm1, xmm7 + movdqu xmm7, [1*16 + input] + pxor xmm2, xmm7 + movdqu xmm7, [2*16 + input] + pxor xmm3, xmm7 + movdqu xmm7, [3*16 + input] + pxor xmm4, xmm7 + movdqu xmm7, [4*16 + input] + pxor xmm5, xmm7 + movdqu xmm7, [5*16 + input] + pxor xmm6, xmm7 + movdqu xmm7, [6*16 + input] + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [252 + ctx], xmm7 + + lea input, [7*16 + input] + lea output, [7*16 + output] + sub inputLen, 7*16 + jmp loop7 +dec1: + + movdqu xmm3, [252 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqa xmm4, xmm0 + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesdec xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesdeclast xmm0, xmm7 + pxor xmm3, xmm0 + + movdqu [output], xmm3 + movdqa xmm3, xmm4 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [252 + ctx], xmm3 + xor eax, eax + pop inputLen + ret +ENDM + +ALIGN 16 +intel_aes_encrypt_cbc_128 PROC +gen_aes_cbc_enc_func 10 +intel_aes_encrypt_cbc_128 ENDP + +ALIGN 16 +intel_aes_encrypt_cbc_192 PROC +gen_aes_cbc_enc_func 12 +intel_aes_encrypt_cbc_192 ENDP + +ALIGN 16 +intel_aes_encrypt_cbc_256 PROC +gen_aes_cbc_enc_func 14 +intel_aes_encrypt_cbc_256 ENDP + +ALIGN 16 +intel_aes_decrypt_cbc_128 PROC +gen_aes_cbc_dec_func 10 +intel_aes_decrypt_cbc_128 ENDP + +ALIGN 16 +intel_aes_decrypt_cbc_192 PROC +gen_aes_cbc_dec_func 12 +intel_aes_decrypt_cbc_192 ENDP + +ALIGN 16 +intel_aes_decrypt_cbc_256 PROC +gen_aes_cbc_dec_func 14 +intel_aes_decrypt_cbc_256 ENDP + + + +ctrCtx textequ +CTR textequ + +gen_aes_ctr_func MACRO rnds + +LOCAL loop7 +LOCAL loop1 +LOCAL enc1 +LOCAL bail + + push inputLen + push ctrCtx + push CTR + push ebp + + mov ctrCtx, [esp + 4*5 + 0*4] + mov output, [esp + 4*5 + 1*4] + mov input, [esp + 4*5 + 4*4] + mov inputLen, [esp + 4*5 + 5*4] + + mov ctx, [4+ctrCtx] + + mov ebp, esp + sub esp, 7*16 + and esp, -16 + + movdqu xmm0, [8+ctrCtx] + mov ctrCtx, [ctrCtx + 8 + 3*4] + bswap ctrCtx + movdqu xmm1, [ctx + 0*16] + + pxor xmm0, xmm1 + + movdqa [esp + 0*16], xmm0 + movdqa [esp + 1*16], xmm0 + movdqa [esp + 2*16], xmm0 + movdqa [esp + 3*16], xmm0 + movdqa [esp + 4*16], xmm0 + movdqa [esp + 5*16], xmm0 + movdqa [esp + 6*16], xmm0 + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 1*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 2*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 3*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 4*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 5*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 6*16 + 3*4], CTR + + +loop7: + cmp inputLen, 7*16 + jb loop1 + + movdqu xmm0, [0*16 + esp] + movdqu xmm1, [1*16 + esp] + movdqu xmm2, [2*16 + esp] + movdqu xmm3, [3*16 + esp] + movdqu xmm4, [4*16 + esp] + movdqu xmm5, [5*16 + esp] + movdqu xmm6, [6*16 + esp] + + i = 1 + WHILE i LE 7 + aes_rnd i + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + (i-1)*16 + 3*4], CTR + + i = i+1 + ENDM + WHILE i LT rnds + aes_rnd i + i = i+1 + ENDM + aes_last_rnd rnds + + movdqu xmm7, [0*16 + input] + pxor xmm0, xmm7 + movdqu xmm7, [1*16 + input] + pxor xmm1, xmm7 + movdqu xmm7, [2*16 + input] + pxor xmm2, xmm7 + movdqu xmm7, [3*16 + input] + pxor xmm3, xmm7 + movdqu xmm7, [4*16 + input] + pxor xmm4, xmm7 + movdqu xmm7, [5*16 + input] + pxor xmm5, xmm7 + movdqu xmm7, [6*16 + input] + pxor xmm6, xmm7 + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + + lea input, [7*16 + input] + lea output, [7*16 + output] + sub inputLen, 7*16 + jmp loop7 + + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [esp] + add esp, 16 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesenclast xmm0, xmm7 + + movdqu xmm7, [input] + pxor xmm0, xmm7 + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + + mov ctrCtx, [ebp + 4*5 + 0*4] + movdqu xmm0, [esp] + movdqu xmm1, [ctx + 0*16] + pxor xmm0, xmm1 + movdqu [8+ctrCtx], xmm0 + + + xor eax, eax + mov esp, ebp + pop ebp + pop CTR + pop ctrCtx + pop inputLen + ret +ENDM + + +ALIGN 16 +intel_aes_encrypt_ctr_128 PROC +gen_aes_ctr_func 10 +intel_aes_encrypt_ctr_128 ENDP + +ALIGN 16 +intel_aes_encrypt_ctr_192 PROC +gen_aes_ctr_func 12 +intel_aes_encrypt_ctr_192 ENDP + +ALIGN 16 +intel_aes_encrypt_ctr_256 PROC +gen_aes_ctr_func 14 +intel_aes_encrypt_ctr_256 ENDP + + +END diff --git a/security/nss/lib/freebl/intel-aes.h b/security/nss/lib/freebl/intel-aes.h new file mode 100644 index 0000000000..e7fe1a03ba --- /dev/null +++ b/security/nss/lib/freebl/intel-aes.h @@ -0,0 +1,143 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Prototypes of the functions defined in the assembler file. */ +void intel_aes_encrypt_init_128(const unsigned char *key, PRUint32 *expanded); +void intel_aes_encrypt_init_192(const unsigned char *key, PRUint32 *expanded); +void intel_aes_encrypt_init_256(const unsigned char *key, PRUint32 *expanded); +void intel_aes_decrypt_init_128(const unsigned char *key, PRUint32 *expanded); +void intel_aes_decrypt_init_192(const unsigned char *key, PRUint32 *expanded); +void intel_aes_decrypt_init_256(const unsigned char *key, PRUint32 *expanded); +SECStatus intel_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ctr_128(CTRContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ctr_192(CTRContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ctr_256(CTRContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); + +#define native_aes_ecb_worker(encrypt, keysize) \ + ((encrypt) \ + ? ((keysize) == 16 ? intel_aes_encrypt_ecb_128 \ + : (keysize) == 24 ? intel_aes_encrypt_ecb_192 \ + : intel_aes_encrypt_ecb_256) \ + : ((keysize) == 16 ? intel_aes_decrypt_ecb_128 \ + : (keysize) == 24 ? intel_aes_decrypt_ecb_192 \ + : intel_aes_decrypt_ecb_256)) + +#define native_aes_cbc_worker(encrypt, keysize) \ + ((encrypt) \ + ? ((keysize) == 16 ? intel_aes_encrypt_cbc_128 \ + : (keysize) == 24 ? intel_aes_encrypt_cbc_192 \ + : intel_aes_encrypt_cbc_256) \ + : ((keysize) == 16 ? intel_aes_decrypt_cbc_128 \ + : (keysize) == 24 ? intel_aes_decrypt_cbc_192 \ + : intel_aes_decrypt_cbc_256)) + +#define intel_aes_ctr_worker(nr) \ + ((nr) == 10 ? intel_aes_encrypt_ctr_128 \ + : (nr) == 12 ? intel_aes_encrypt_ctr_192 \ + : intel_aes_encrypt_ctr_256) + +#define native_aes_init(encrypt, keysize) \ + do { \ + if (encrypt) { \ + if (keysize == 16) \ + intel_aes_encrypt_init_128(key, cx->k.expandedKey); \ + else if (keysize == 24) \ + intel_aes_encrypt_init_192(key, cx->k.expandedKey); \ + else \ + intel_aes_encrypt_init_256(key, cx->k.expandedKey); \ + } else { \ + if (keysize == 16) \ + intel_aes_decrypt_init_128(key, cx->k.expandedKey); \ + else if (keysize == 24) \ + intel_aes_decrypt_init_192(key, cx->k.expandedKey); \ + else \ + intel_aes_decrypt_init_256(key, cx->k.expandedKey); \ + } \ + } while (0) diff --git a/security/nss/lib/freebl/intel-aes.s b/security/nss/lib/freebl/intel-aes.s new file mode 100644 index 0000000000..b242d233fe --- /dev/null +++ b/security/nss/lib/freebl/intel-aes.s @@ -0,0 +1,2485 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + .text + +#define IV_OFFSET 256 + +/* + * Warning: the length values used in this module are "unsigned int" + * in C, which is 32-bit. When they're passed in registers, use only + * the low 32 bits, because the top half is unspecified. + * + * This is called from C code, so the contents of those bits can + * depend on the C compiler's optimization decisions. This means that + * mistakes might not be obvious in testing if those bits happen to be + * zero in your build. + * + * Exception: 32-bit lea instructions use a 64-bit address because the + * address size doesn't affect the result, and that form is more + * compactly encoded and preferred by compilers over a 32-bit address. + */ + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_encrypt_init_128,@function + .globl intel_aes_encrypt_init_128 + .align 16 +intel_aes_encrypt_init_128: + movups (%rdi), %xmm1 + movups %xmm1, (%rsi) + leaq 16(%rsi), %rsi + xorl %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ + call key_expansion128 + + ret + .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128 + + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_decrypt_init_128,@function + .globl intel_aes_decrypt_init_128 + .align 16 +intel_aes_decrypt_init_128: + movups (%rdi), %xmm1 + movups %xmm1, (%rsi) + leaq 16(%rsi), %rsi + xorl %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ + call key_expansion128 + + ret + .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128 + + + .type key_expansion128,@function + .align 16 +key_expansion128: + movd %eax, %xmm3 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm3 + pxor %xmm3, %xmm1 + shufps $0x8c, %xmm1, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqu %xmm1, (%rsi) + addq $16, %rsi + ret + .size key_expansion128, .-key_expansion128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_ecb_128,@function + .globl intel_aes_encrypt_ecb_128 + .align 16 +intel_aes_encrypt_ecb_128: + movdqu (%rdi), %xmm2 + movdqu 160(%rdi), %xmm12 + xor %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm2, %xmm3 + pxor %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm6 + pxor %xmm2, %xmm7 + pxor %xmm2, %xmm8 + pxor %xmm2, %xmm9 + pxor %xmm2, %xmm10 + +// complete loop unrolling + movdqu 16(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_ecb_128,@function + .globl intel_aes_decrypt_ecb_128 + .align 16 +intel_aes_decrypt_ecb_128: + movdqu (%rdi), %xmm2 + movdqu 160(%rdi), %xmm12 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm12, %xmm3 + pxor %xmm12, %xmm4 + pxor %xmm12, %xmm5 + pxor %xmm12, %xmm6 + pxor %xmm12, %xmm7 + pxor %xmm12, %xmm8 + pxor %xmm12, %xmm9 + pxor %xmm12, %xmm10 + +// complete loop unrolling + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm12, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_cbc_128,@function + .globl intel_aes_encrypt_cbc_128 + .align 16 +intel_aes_encrypt_cbc_128: + testl %r9d, %r9d + je 2f + +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + + xorl %eax, %eax +1: movdqu (%r8, %rax), %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm1, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 1b + + movdqu %xmm0, (%rdx) + +2: xor %eax, %eax + ret + .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_cbc_128,@function + .globl intel_aes_decrypt_cbc_128 + .align 16 +intel_aes_decrypt_cbc_128: +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx + + movdqu (%rdx), %xmm0 /* iv */ + movdqu (%rdi), %xmm2 /* first key block */ + movdqu 160(%rdi), %xmm12 /* last key block */ + xorl %eax, %eax + cmpl $128, %r9d + jb 1f + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 /* 1st data block */ + movdqu 16(%r8, %rax), %xmm4 /* 2d data block */ + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm12, %xmm3 + pxor %xmm12, %xmm4 + pxor %xmm12, %xmm5 + pxor %xmm12, %xmm6 + pxor %xmm12, %xmm7 + pxor %xmm12, %xmm8 + pxor %xmm12, %xmm9 + pxor %xmm12, %xmm10 + +// complete loop unrolling + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + pxor %xmm0, %xmm3 + movdqu (%r8, %rax), %xmm0 + pxor %xmm0, %xmm4 + movdqu 16(%r8, %rax), %xmm0 + pxor %xmm0, %xmm5 + movdqu 32(%r8, %rax), %xmm0 + pxor %xmm0, %xmm6 + movdqu 48(%r8, %rax), %xmm0 + pxor %xmm0, %xmm7 + movdqu 64(%r8, %rax), %xmm0 + pxor %xmm0, %xmm8 + movdqu 80(%r8, %rax), %xmm0 + pxor %xmm0, %xmm9 + movdqu 96(%r8, %rax), %xmm0 + pxor %xmm0, %xmm10 + movdqu 112(%r8, %rax), %xmm0 + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + +4: movdqu (%r8, %rax), %xmm1 + movdqa %xmm1, %xmm13 + pxor %xmm12, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + pxor %xmm0, %xmm1 + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm13, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: movdqu %xmm0, (%rdx) + + xor %eax, %eax + ret + .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128 + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_encrypt_init_192,@function + .globl intel_aes_encrypt_init_192 + .align 16 +intel_aes_encrypt_init_192: + movdqu (%rdi), %xmm1 + movq 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + movq %xmm3, 16(%rsi) + leaq 24(%rsi), %rsi + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ + call key_expansion192 + + ret + .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192 + + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_decrypt_init_192,@function + .globl intel_aes_decrypt_init_192 + .align 16 +intel_aes_decrypt_init_192: + movdqu (%rdi), %xmm1 + movq 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + movq %xmm3, 16(%rsi) + leaq 24(%rsi), %rsi + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -24(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -24(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -24(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ + call key_expansion192 + + ret + .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192 + + + .type key_expansion192,@function + .align 16 +key_expansion192: + pshufd $0x55, %xmm2, %xmm2 + xor %eax, %eax + movd %eax, %xmm4 + shufps $0x10, %xmm1, %xmm4 + pxor %xmm4, %xmm1 + shufps $0x8c, %xmm1, %xmm4 + pxor %xmm2, %xmm1 + pxor %xmm4, %xmm1 + movdqu %xmm1, (%rsi) + addq $16, %rsi + + pshufd $0xff, %xmm1, %xmm4 + movd %eax, %xmm5 + shufps $0x00, %xmm3, %xmm5 + shufps $0x08, %xmm3, %xmm5 + pxor %xmm4, %xmm3 + pxor %xmm5, %xmm3 + movq %xmm3, (%rsi) + addq $8, %rsi + ret + .size key_expansion192, .-key_expansion192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_ecb_192,@function + .globl intel_aes_encrypt_ecb_192 + .align 16 +intel_aes_encrypt_ecb_192: + movdqu (%rdi), %xmm2 + movdqu 192(%rdi), %xmm14 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm2, %xmm3 + pxor %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm6 + pxor %xmm2, %xmm7 + pxor %xmm2, %xmm8 + pxor %xmm2, %xmm9 + pxor %xmm2, %xmm10 + +// complete loop unrolling + movdqu 16(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_ecb_192,@function + .globl intel_aes_decrypt_ecb_192 + .align 16 +intel_aes_decrypt_ecb_192: + movdqu (%rdi), %xmm2 + movdqu 192(%rdi), %xmm14 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm14, %xmm3 + pxor %xmm14, %xmm4 + pxor %xmm14, %xmm5 + pxor %xmm14, %xmm6 + pxor %xmm14, %xmm7 + pxor %xmm14, %xmm8 + pxor %xmm14, %xmm9 + pxor %xmm14, %xmm10 + +// complete loop unrolling + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm14, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_cbc_192,@function + .globl intel_aes_encrypt_cbc_192 + .align 16 +intel_aes_encrypt_cbc_192: + testl %r9d, %r9d + je 2f + +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + movdqu 192(%rdi), %xmm14 + + xorl %eax, %eax +1: movdqu (%r8, %rax), %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm1, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 1b + + movdqu %xmm0, (%rdx) + +2: xor %eax, %eax + ret + .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %exx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_cbc_192,@function + .globl intel_aes_decrypt_cbc_192 + .align 16 +intel_aes_decrypt_cbc_192: +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 192(%rdi), %xmm14 + xorl %eax, %eax + cmpl $128, %r9d + jb 1f + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm14, %xmm3 + pxor %xmm14, %xmm4 + pxor %xmm14, %xmm5 + pxor %xmm14, %xmm6 + pxor %xmm14, %xmm7 + pxor %xmm14, %xmm8 + pxor %xmm14, %xmm9 + pxor %xmm14, %xmm10 + +// complete loop unrolling + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + pxor %xmm0, %xmm3 + movdqu (%r8, %rax), %xmm0 + pxor %xmm0, %xmm4 + movdqu 16(%r8, %rax), %xmm0 + pxor %xmm0, %xmm5 + movdqu 32(%r8, %rax), %xmm0 + pxor %xmm0, %xmm6 + movdqu 48(%r8, %rax), %xmm0 + pxor %xmm0, %xmm7 + movdqu 64(%r8, %rax), %xmm0 + pxor %xmm0, %xmm8 + movdqu 80(%r8, %rax), %xmm0 + pxor %xmm0, %xmm9 + movdqu 96(%r8, %rax), %xmm0 + pxor %xmm0, %xmm10 + movdqu 112(%r8, %rax), %xmm0 + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + +4: movdqu (%r8, %rax), %xmm1 + movdqa %xmm1, %xmm15 + pxor %xmm14, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + pxor %xmm0, %xmm1 + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm15, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: movdqu %xmm0, (%rdx) + + xor %eax, %eax + ret + .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192 + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_encrypt_init_256,@function + .globl intel_aes_encrypt_init_256 + .align 16 +intel_aes_encrypt_init_256: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + movdqu %xmm3, 16(%rsi) + leaq 32(%rsi), %rsi + xor %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + pxor %xmm6, %xmm6 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm6 + pxor %xmm6, %xmm1 + shufps $0x8c, %xmm1, %xmm6 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%rsi) + + ret + .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256 + + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_decrypt_init_256,@function + .globl intel_aes_decrypt_init_256 + .align 16 +intel_aes_decrypt_init_256: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */ + movdqu %xmm4, 16(%rsi) + leaq 32(%rsi), %rsi + xor %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + pxor %xmm6, %xmm6 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm6 + pxor %xmm6, %xmm1 + shufps $0x8c, %xmm1, %xmm6 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%rsi) + + ret + .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256 + + + .type key_expansion256,@function + .align 16 +key_expansion256: + movd %eax, %xmm6 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm6 + pxor %xmm6, %xmm1 + shufps $0x8c, %xmm1, %xmm6 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%rsi) + + addq $16, %rsi + .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */ + pshufd $0xaa, %xmm4, %xmm4 + shufps $0x10, %xmm3, %xmm6 + pxor %xmm6, %xmm3 + shufps $0x8c, %xmm3, %xmm6 + pxor %xmm4, %xmm3 + pxor %xmm6, %xmm3 + movdqu %xmm3, (%rsi) + addq $16, %rsi + ret + .size key_expansion256, .-key_expansion256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_ecb_256,@function + .globl intel_aes_encrypt_ecb_256 + .align 16 +intel_aes_encrypt_ecb_256: + movdqu (%rdi), %xmm2 + movdqu 224(%rdi), %xmm15 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm2, %xmm3 + pxor %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm6 + pxor %xmm2, %xmm7 + pxor %xmm2, %xmm8 + pxor %xmm2, %xmm9 + pxor %xmm2, %xmm10 + +// complete loop unrolling + movdqu 16(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + movdqu 192(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 208(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu (%rdi), %xmm8 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm8, %xmm1 + movdqu 112(%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_ecb_256,@function + .globl intel_aes_decrypt_ecb_256 + .align 16 +intel_aes_decrypt_ecb_256: + movdqu (%rdi), %xmm2 + movdqu 224(%rdi), %xmm15 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm15, %xmm3 + pxor %xmm15, %xmm4 + pxor %xmm15, %xmm5 + pxor %xmm15, %xmm6 + pxor %xmm15, %xmm7 + pxor %xmm15, %xmm8 + pxor %xmm15, %xmm9 + pxor %xmm15, %xmm10 + +// complete loop unrolling + movdqu 208(%rdi), %xmm1 + movdqu 192(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 112(%rdi), %xmm8 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm15, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ + movdqu 112(%rdi), %xmm8 + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_cbc_256,@function + .globl intel_aes_encrypt_cbc_256 + .align 16 +intel_aes_encrypt_cbc_256: + testl %r9d, %r9d + je 2f + +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm8 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + movdqu 224(%rdi), %xmm15 + + xorl %eax, %eax +1: movdqu (%r8, %rax), %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm8, %xmm1 + movdqu 112(%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm1, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 1b + + movdqu %xmm0, (%rdx) + +2: xor %eax, %eax + ret + .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_cbc_256,@function + .globl intel_aes_decrypt_cbc_256 + .align 16 +intel_aes_decrypt_cbc_256: +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 224(%rdi), %xmm15 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm15, %xmm3 + pxor %xmm15, %xmm4 + pxor %xmm15, %xmm5 + pxor %xmm15, %xmm6 + pxor %xmm15, %xmm7 + pxor %xmm15, %xmm8 + pxor %xmm15, %xmm9 + pxor %xmm15, %xmm10 + +// complete loop unrolling + movdqu 208(%rdi), %xmm1 + movdqu 192(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + pxor %xmm0, %xmm3 + movdqu (%r8, %rax), %xmm0 + pxor %xmm0, %xmm4 + movdqu 16(%r8, %rax), %xmm0 + pxor %xmm0, %xmm5 + movdqu 32(%r8, %rax), %xmm0 + pxor %xmm0, %xmm6 + movdqu 48(%r8, %rax), %xmm0 + pxor %xmm0, %xmm7 + movdqu 64(%r8, %rax), %xmm0 + pxor %xmm0, %xmm8 + movdqu 80(%r8, %rax), %xmm0 + pxor %xmm0, %xmm9 + movdqu 96(%r8, %rax), %xmm0 + pxor %xmm0, %xmm10 + movdqu 112(%r8, %rax), %xmm0 + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 112(%rdi), %xmm8 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm15, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ + movdqu 112(%rdi), %xmm8 + pxor %xmm0, %xmm1 + movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */ + movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */ + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: movdqu %xmm0, (%rdx) + + xor %eax, %eax + ret + .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256 diff --git a/security/nss/lib/freebl/intel-gcm-wrap.c b/security/nss/lib/freebl/intel-gcm-wrap.c new file mode 100644 index 0000000000..5adbd81f74 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm-wrap.c @@ -0,0 +1,475 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* Copyright(c) 2013, Intel Corp. */ + +/* Wrapper functions for Intel optimized implementation of AES-GCM */ + +#ifdef USE_HW_AES + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapii.h" +#include "blapit.h" +#include "gcm.h" +#include "ctr.h" +#include "secerr.h" +#include "prtypes.h" +#include "pkcs11t.h" + +#include + +#include "intel-gcm.h" +#include "rijndael.h" + +#include +#include + +struct intel_AES_GCMContextStr { + unsigned char Htbl[16 * AES_BLOCK_SIZE]; + unsigned char X0[AES_BLOCK_SIZE]; + unsigned char T[AES_BLOCK_SIZE]; + unsigned char CTR[AES_BLOCK_SIZE]; + AESContext *aes_context; + unsigned long tagBits; + unsigned long Alen; + unsigned long Mlen; + freeblCipherFunc cipher; + PRBool ctr_context_init; + gcmIVContext gcm_iv; +}; + +SECStatus intel_aes_gcmInitCounter(intel_AES_GCMContext *gcm, + const unsigned char *iv, + unsigned long ivLen, unsigned long tagBits, + const unsigned char *aad, unsigned long aadLen); + +intel_AES_GCMContext * +intel_AES_GCM_CreateContext(void *context, + freeblCipherFunc cipher, + const unsigned char *params) +{ + intel_AES_GCMContext *gcm = NULL; + AESContext *aes = (AESContext *)context; + const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params; + SECStatus rv; + + gcm = PORT_ZNew(intel_AES_GCMContext); + if (gcm == NULL) { + return NULL; + } + + /* initialize context fields */ + gcm->aes_context = aes; + gcm->cipher = cipher; + gcm->Alen = 0; + gcm->Mlen = 0; + gcm->ctr_context_init = PR_FALSE; + + /* first prepare H and its derivatives for ghash */ + intel_aes_gcmINIT(gcm->Htbl, (unsigned char *)aes->k.expandedKey, aes->Nr); + + gcm_InitIVContext(&gcm->gcm_iv); + + /* if gcmParams is NULL, then we are creating an PKCS #11 MESSAGE + * style context, in which we initialize the key once, then do separate + * iv/aad's for each message. If we are doing that kind of operation, + * we've finished with init here. We'll init the Counter in each AEAD + * call */ + if (gcmParams == NULL) { + return gcm; + } + + rv = intel_aes_gcmInitCounter(gcm, gcmParams->pIv, + gcmParams->ulIvLen, gcmParams->ulTagBits, + gcmParams->pAAD, gcmParams->ulAADLen); + if (rv != SECSuccess) { + PORT_Free(gcm); + return NULL; + } + gcm->ctr_context_init = PR_TRUE; + + return gcm; +} + +SECStatus +intel_aes_gcmInitCounter(intel_AES_GCMContext *gcm, + const unsigned char *iv, unsigned long ivLen, + unsigned long tagBits, + const unsigned char *aad, unsigned long aadLen) +{ + unsigned char buff[AES_BLOCK_SIZE]; /* aux buffer */ + unsigned long IV_whole_len = ivLen & (~0xful); + unsigned int IV_remainder_len = ivLen & 0xful; + unsigned long AAD_whole_len = aadLen & (~0xful); + unsigned int AAD_remainder_len = aadLen & 0xful; + unsigned int j; + __m128i BSWAP_MASK = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m128i ONE = _mm_set_epi32(0, 0, 0, 1); + SECStatus rv; + + if (ivLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (tagBits != 128 && tagBits != 120 && tagBits != 112 && + tagBits != 104 && tagBits != 96 && tagBits != 64 && + tagBits != 32) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + gcm->tagBits = tagBits; + + /* reset the aad and message length counters */ + gcm->Alen = 0; + gcm->Mlen = 0; + + // Limit AADLen in accordance with SP800-38D + if (sizeof(AAD_whole_len) >= 8 && AAD_whole_len > (1ULL << 61) - 1) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + /* Initial TAG value is zero */ + _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128()); + _mm_storeu_si128((__m128i *)gcm->X0, _mm_setzero_si128()); + + /* Init the counter */ + if (ivLen == 12) { + _mm_storeu_si128((__m128i *)gcm->CTR, + _mm_setr_epi32(((unsigned int *)iv)[0], + ((unsigned int *)iv)[1], + ((unsigned int *)iv)[2], + 0x01000000)); + } else { + /* If IV size is not 96 bits, then the initial counter value is GHASH + * of the IV */ + intel_aes_gcmAAD(gcm->Htbl, (unsigned char *)iv, IV_whole_len, gcm->T); + + /* Partial block */ + if (IV_remainder_len) { + PORT_Memset(buff, 0, AES_BLOCK_SIZE); + PORT_Memcpy(buff, iv + IV_whole_len, IV_remainder_len); + intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T); + } + + intel_aes_gcmTAG( + gcm->Htbl, + gcm->T, + ivLen, + 0, + gcm->X0, + gcm->CTR); + + /* TAG should be zero again */ + _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128()); + } + + /* Encrypt the initial counter, will be used to encrypt the GHASH value, + * in the end */ + rv = (*gcm->cipher)(gcm->aes_context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR, + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + return SECFailure; + } + + /* Promote the counter by 1 */ + _mm_storeu_si128((__m128i *)gcm->CTR, _mm_shuffle_epi8(_mm_add_epi32(ONE, _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)gcm->CTR), BSWAP_MASK)), BSWAP_MASK)); + + /* Now hash AAD - it would actually make sense to seperate the context + * creation from the AAD, because that would allow to reuse the H, which + * only changes when the AES key changes, and not every package, like the + * IV and AAD */ + intel_aes_gcmAAD(gcm->Htbl, (unsigned char *)aad, AAD_whole_len, gcm->T); + if (AAD_remainder_len) { + PORT_Memset(buff, 0, AES_BLOCK_SIZE); + PORT_Memcpy(buff, aad + AAD_whole_len, AAD_remainder_len); + intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T); + } + gcm->Alen += aadLen; + return SECSuccess; +} + +void +intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit) +{ + PORT_Memset(gcm, 0, sizeof(intel_AES_GCMContext)); + if (freeit) { + PORT_Free(gcm); + } +} + +SECStatus +intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + unsigned int j; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4) { + unsigned long long inlen_ull = inlen; + if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + if (UINT_MAX - inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen + tagBytes) { + *outlen = inlen + tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + intel_aes_gcmENC( + inbuf, + outbuf, + gcm, + inlen); + + gcm->Mlen += inlen; + + intel_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + *outlen = inlen + tagBytes; + + for (j = 0; j < tagBytes; j++) { + outbuf[inlen + j] = T[j]; + } + return SECSuccess; +} + +SECStatus +intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + /* get the authentication block */ + if (inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + inlen -= tagBytes; + intag = inbuf + inlen; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4) { + unsigned long long inlen_ull = inlen; + if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + intel_aes_gcmDEC( + inbuf, + outbuf, + gcm, + inlen); + + gcm->Mlen += inlen; + intel_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} + +SECStatus +intel_AES_GCM_EncryptAEAD(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + SECStatus rv; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4) { + unsigned long long inlen_ull = inlen; + if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* if we were initialized with the C_EncryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulIvFixedBits, gcmParams->ivGenerator); + if (rv != SECSuccess) { + return SECFailure; + } + + rv = intel_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + intel_aes_gcmENC(inbuf, outbuf, gcm, inlen); + + gcm->Mlen += inlen; + + intel_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T); + + *outlen = inlen; + PORT_Memcpy(gcmParams->pTag, T, tagBytes); + return SECSuccess; +} + +SECStatus +intel_AES_GCM_DecryptAEAD(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + SECStatus rv; + + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* if we were initialized with the C_DecryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4) { + unsigned long long inlen_ull = inlen; + if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = intel_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + intag = gcmParams->pTag; + PORT_Assert(tagBytes != 0); + + intel_aes_gcmDEC(inbuf, outbuf, gcm, inlen); + + gcm->Mlen += inlen; + intel_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} +#endif diff --git a/security/nss/lib/freebl/intel-gcm-x64-masm.asm b/security/nss/lib/freebl/intel-gcm-x64-masm.asm new file mode 100644 index 0000000000..07ddefbc1e --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm-x64-masm.asm @@ -0,0 +1,1294 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.DATA +ALIGN 16 +Lone dq 1,0 +Ltwo dq 2,0 +Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh +Lpoly dq 01h, 0c200000000000000h + +.CODE + + +GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4 + vpclmulqdq TMP1, SRC2, SRC1, 0h + vpclmulqdq TMP4, SRC2, SRC1, 011h + + vpshufd TMP2, SRC2, 78 + vpshufd TMP3, SRC1, 78 + vpxor TMP2, TMP2, SRC2 + vpxor TMP3, TMP3, SRC1 + + vpclmulqdq TMP2, TMP2, TMP3, 0h + vpxor TMP2, TMP2, TMP1 + vpxor TMP2, TMP2, TMP4 + + vpslldq TMP3, TMP2, 8 + vpsrldq TMP2, TMP2, 8 + + vpxor TMP1, TMP1, TMP3 + vpxor TMP4, TMP4, TMP2 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpxor DST, TMP1, TMP4 + + ENDM + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the final GCM tag +; void intel_aes_gcmTAG(unsigned char Htbl[16*16], +; unsigned char *Tp, +; unsigned int Mlen, +; unsigned int Alen, +; unsigned char *X0, +; unsigned char *TAG); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmTAG PROC + +Htbl textequ +Tp textequ +Mlen textequ +Alen textequ +X0 textequ +TAG textequ + +T textequ +TMP0 textequ + + mov X0, [rsp + 1*8 + 4*8] + mov TAG, [rsp + 1*8 + 5*8] + + vzeroupper + vmovdqu T, XMMWORD PTR[Tp] + vpxor TMP0, TMP0, TMP0 + + shl Mlen, 3 + shl Alen, 3 + + ;vpinsrq TMP0, TMP0, Mlen, 0 + ;vpinsrq TMP0, TMP0, Alen, 1 + ; workaround the ml64.exe vpinsrq issue + vpinsrd TMP0, TMP0, r8d, 0 + vpinsrd TMP0, TMP0, r9d, 2 + shr Mlen, 32 + shr Alen, 32 + vpinsrd TMP0, TMP0, r8d, 1 + vpinsrd TMP0, TMP0, r9d, 3 + + vpxor T, T, TMP0 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + + vpshufb T, T, [Lbswap_mask] + vpxor T, T, [X0] + vmovdqu XMMWORD PTR[TAG], T + vzeroupper + + ret + +intel_aes_gcmTAG ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the H table +; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmINIT PROC + +Htbl textequ +KS textequ +NR textequ + +T textequ +TMP0 textequ + + vzeroupper + ; AES-ENC(0) + vmovdqu T, XMMWORD PTR[KS] + lea KS, [16 + KS] + dec NR +Lenc_loop: + vaesenc T, T, [KS] + lea KS, [16 + KS] + dec NR + jnz Lenc_loop + + vaesenclast T, T, [KS] + vpshufb T, T, [Lbswap_mask] + + ;Calculate H` = GFMUL(H, 2) + vpsrad xmm3, T, 31 + vpshufd xmm3, xmm3, 0ffh + vpand xmm5, xmm3, [Lpoly] + vpsrld xmm3, T, 31 + vpslld xmm4, T, 1 + vpslldq xmm3, xmm3, 4 + vpxor T, xmm4, xmm3 + vpxor T, T, xmm5 + + vmovdqu TMP0, T + vmovdqu XMMWORD PTR[Htbl + 0*16], T + + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2 + + i = 1 + WHILE i LT 8 + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + vmovdqu XMMWORD PTR[Htbl + i*16], T + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2 + i = i+1 + ENDM + vzeroupper + ret +intel_aes_gcmINIT ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Authenticate only +; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmAAD PROC + +Htbl textequ +inp textequ +len textequ +Tp textequ +hlp0 textequ + +DATA textequ +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +Xhi textequ + +KARATSUBA_AAD MACRO i + vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h + vpxor TMP2, TMP2, TMP3 +ENDM + + test len, len + jnz LbeginAAD + ret + +LbeginAAD: + vzeroupper + + sub rsp, 2*16 + vmovdqu XMMWORD PTR[rsp + 0*16], xmm6 + vmovdqu XMMWORD PTR[rsp + 1*16], xmm7 + + vpxor Xhi, Xhi, Xhi + + vmovdqu T, XMMWORD PTR[Tp] + ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first + mov hlp0, len + and hlp0, 128-1 + jz Lmod_loop + + and len, -128 + sub hlp0, 16 + + ; Prefix block + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + + vpclmulqdq TMP0, DATA, [Htbl + hlp0], 0h + vpclmulqdq TMP1, DATA, [Htbl + hlp0], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + hlp0], 0h + + lea inp, [inp+16] + test hlp0, hlp0 + jnz Lpre_loop + jmp Lred1 + + ;hash remaining prefix bocks (up to 7 total prefix blocks) +Lpre_loop: + + sub hlp0, 16 + + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP3, DATA, [Htbl + hlp0], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, [Htbl + hlp0], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + hlp0], 0h + vpxor TMP2, TMP2, TMP3 + + test hlp0, hlp0 + lea inp, [inp+16] + jnz Lpre_loop + +Lred1: + + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + + +Lmod_loop: + + sub len, 16*8 + jb Ldone + ; Block #0 + vmovdqu DATA, XMMWORD PTR[inp + 16*7] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP0, DATA, [Htbl + 0*16], 0h + vpclmulqdq TMP1, DATA, [Htbl + 0*16], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + 0*16], 0h + + ; Block #1 + vmovdqu DATA, XMMWORD PTR[inp + 16*6] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 1 + + ; Block #2 + vmovdqu DATA, XMMWORD PTR[inp + 16*5] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 2 + + vpxor T, T, TMP4 ;reduction stage 1b + + ; Block #3 + vmovdqu DATA, XMMWORD PTR[inp + 16*4] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 3 + ; Block #4 + vmovdqu DATA, XMMWORD PTR[inp + 16*3] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 4 + + vpxor T, T, TMP4 ;reduction stage 2b + ; Block #5 + vmovdqu DATA, XMMWORD PTR[inp + 16*2] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 5 + + vpxor T, T, Xhi ;reduction finalize + ; Block #6 + vmovdqu DATA, XMMWORD PTR[inp + 16*1] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 6 + ; Block #7 + vmovdqu DATA, XMMWORD PTR[inp + 16*0] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + KARATSUBA_AAD 7 + ; Aggregated 8 blocks, now karatsuba fixup + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + + lea inp, [inp + 16*8] + jmp Lmod_loop + +Ldone: + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpxor T, T, Xhi + vmovdqu XMMWORD PTR[Tp], T + vzeroupper + + vmovdqu xmm6, XMMWORD PTR[rsp + 0*16] + vmovdqu xmm7, XMMWORD PTR[rsp + 1*16] + add rsp, 16*2 + + ret + +intel_aes_gcmAAD ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Encrypt and Authenticate +; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmENC PROC + +PT textequ +CT textequ +Htbl textequ +Gctx textequ +len textequ +KS textequ +NR textequ + +aluCTR textequ +aluKSl textequ +aluTMP textequ + +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +TMP5 textequ +CTR0 textequ +CTR1 textequ +CTR2 textequ +CTR3 textequ +CTR4 textequ +CTR5 textequ +CTR6 textequ +CTR7 textequ +BSWAPMASK textequ + +ROUND MACRO i + vmovdqu TMP3, XMMWORD PTR[i*16 + KS] + vaesenc CTR0, CTR0, TMP3 + vaesenc CTR1, CTR1, TMP3 + vaesenc CTR2, CTR2, TMP3 + vaesenc CTR3, CTR3, TMP3 + vaesenc CTR4, CTR4, TMP3 + vaesenc CTR5, CTR5, TMP3 + vaesenc CTR6, CTR6, TMP3 + vaesenc CTR7, CTR7, TMP3 +ENDM +ROUNDMUL MACRO i + vmovdqu TMP3, XMMWORD PTR[i*16 + KS] + + vaesenc CTR0, CTR0, TMP3 + vaesenc CTR1, CTR1, TMP3 + vaesenc CTR2, CTR2, TMP3 + vaesenc CTR3, CTR3, TMP3 + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + + vaesenc CTR4, CTR4, TMP3 + vaesenc CTR5, CTR5, TMP3 + vaesenc CTR6, CTR6, TMP3 + vaesenc CTR7, CTR7, TMP3 + + vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h + vpxor TMP0, TMP0, TMP3 + vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] + vpclmulqdq TMP3, TMP5, TMP4, 011h + vpxor TMP1, TMP1, TMP3 + vpclmulqdq TMP3, TMP5, TMP4, 000h + vpxor TMP2, TMP2, TMP3 +ENDM +KARATSUBA MACRO i + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h + vpxor TMP0, TMP0, TMP3 + vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] + vpclmulqdq TMP3, TMP5, TMP4, 011h + vpxor TMP1, TMP1, TMP3 + vpclmulqdq TMP3, TMP5, TMP4, 000h + vpxor TMP2, TMP2, TMP3 +ENDM +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + xor aluTMP, aluKSl + bswap aluTMP + mov [3*4 + 8*16 + i*16 + rsp], aluTMP +ENDM + + + test len, len + jnz LbeginENC + ret + +LbeginENC: + + vzeroupper + push r11 + push r12 + push r13 + push rbp + sub rsp, 10*16 + vmovdqu XMMWORD PTR[rsp + 0*16], xmm6 + vmovdqu XMMWORD PTR[rsp + 1*16], xmm7 + vmovdqu XMMWORD PTR[rsp + 2*16], xmm8 + vmovdqu XMMWORD PTR[rsp + 3*16], xmm9 + vmovdqu XMMWORD PTR[rsp + 4*16], xmm10 + vmovdqu XMMWORD PTR[rsp + 5*16], xmm11 + vmovdqu XMMWORD PTR[rsp + 6*16], xmm12 + vmovdqu XMMWORD PTR[rsp + 7*16], xmm13 + vmovdqu XMMWORD PTR[rsp + 8*16], xmm14 + vmovdqu XMMWORD PTR[rsp + 9*16], xmm15 + + mov rbp, rsp + sub rsp, 16*16 + and rsp, -16 + + vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx] + vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask] + mov KS, [16*16 + 3*16 + Gctx] + mov NR, [244 + KS] + lea KS, [KS] + + vpshufb CTR0, CTR0, BSWAPMASK + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + mov aluKSl, [3*4 + KS] + bswap aluCTR + bswap aluKSl + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[8*16 + 0*16 + rsp], TMP0 + + cmp len, 128 + jb LEncDataSingles +; Prepare the "top" counters + vmovdqu XMMWORD PTR[8*16 + 1*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 2*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 3*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 4*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 5*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 6*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 7*16 + rsp], TMP0 + +; Encrypt the initial 8 blocks + sub len, 128 + vpaddd CTR1, CTR0, XMMWORD PTR[Lone] + vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo] + vpaddd CTR3, CTR2, XMMWORD PTR[Lone] + vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo] + vpaddd CTR5, CTR4, XMMWORD PTR[Lone] + vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo] + vpaddd CTR7, CTR6, XMMWORD PTR[Lone] + + vpshufb CTR0, CTR0, BSWAPMASK + vpshufb CTR1, CTR1, BSWAPMASK + vpshufb CTR2, CTR2, BSWAPMASK + vpshufb CTR3, CTR3, BSWAPMASK + vpshufb CTR4, CTR4, BSWAPMASK + vpshufb CTR5, CTR5, BSWAPMASK + vpshufb CTR6, CTR6, BSWAPMASK + vpshufb CTR7, CTR7, BSWAPMASK + + vmovdqu TMP3, XMMWORD PTR[0*16 + KS] + vpxor CTR0, CTR0, TMP3 + vpxor CTR1, CTR1, TMP3 + vpxor CTR2, CTR2, TMP3 + vpxor CTR3, CTR3, TMP3 + vpxor CTR4, CTR4, TMP3 + vpxor CTR5, CTR5, TMP3 + vpxor CTR6, CTR6, TMP3 + vpxor CTR7, CTR7, TMP3 + + ROUND 1 + + add aluCTR, 8 + mov aluTMP, aluCTR + xor aluTMP, aluKSl + bswap aluTMP + mov [8*16 + 0*16 + 3*4 + rsp], aluTMP + + ROUND 2 + NEXTCTR 1 + ROUND 3 + NEXTCTR 2 + ROUND 4 + NEXTCTR 3 + ROUND 5 + NEXTCTR 4 + ROUND 6 + NEXTCTR 5 + ROUND 7 + NEXTCTR 6 + ROUND 8 + NEXTCTR 7 + ROUND 9 + vmovdqu TMP5, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu TMP5, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu TMP5, XMMWORD PTR[14*16 + KS] +@@: + vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT] + vaesenclast CTR0, CTR0, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT] + vaesenclast CTR1, CTR1, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT] + vaesenclast CTR2, CTR2, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT] + vaesenclast CTR3, CTR3, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT] + vaesenclast CTR4, CTR4, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT] + vaesenclast CTR5, CTR5, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT] + vaesenclast CTR6, CTR6, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT] + vaesenclast CTR7, CTR7, TMP3 + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vpshufb CTR0, CTR0, BSWAPMASK + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vpshufb CTR1, CTR1, BSWAPMASK + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vpshufb CTR2, CTR2, BSWAPMASK + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vpshufb CTR3, CTR3, BSWAPMASK + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vpshufb CTR4, CTR4, BSWAPMASK + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vpshufb CTR5, CTR5, BSWAPMASK + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + vpshufb CTR6, CTR6, BSWAPMASK + vmovdqu XMMWORD PTR[7*16 + CT], CTR7 + vpshufb TMP5, CTR7, BSWAPMASK + + vmovdqa XMMWORD PTR[1*16 + rsp], CTR6 + vmovdqa XMMWORD PTR[2*16 + rsp], CTR5 + vmovdqa XMMWORD PTR[3*16 + rsp], CTR4 + vmovdqa XMMWORD PTR[4*16 + rsp], CTR3 + vmovdqa XMMWORD PTR[5*16 + rsp], CTR2 + vmovdqa XMMWORD PTR[6*16 + rsp], CTR1 + vmovdqa XMMWORD PTR[7*16 + rsp], CTR0 + + lea CT, [8*16 + CT] + lea PT, [8*16 + PT] + jmp LEncDataOctets + +LEncDataOctets: + cmp len, 128 + jb LEndEncOctets + sub len, 128 + + vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + rsp] + vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + rsp] + vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + rsp] + vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + rsp] + vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + rsp] + vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + rsp] + vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + rsp] + vmovdqa CTR7, XMMWORD PTR[8*16 + 7*16 + rsp] + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + rsp] + ROUNDMUL 1 + NEXTCTR 0 + vmovdqu TMP5, XMMWORD PTR[2*16 + rsp] + ROUNDMUL 2 + NEXTCTR 1 + vmovdqu TMP5, XMMWORD PTR[3*16 + rsp] + ROUNDMUL 3 + NEXTCTR 2 + vmovdqu TMP5, XMMWORD PTR[4*16 + rsp] + ROUNDMUL 4 + NEXTCTR 3 + vmovdqu TMP5, XMMWORD PTR[5*16 + rsp] + ROUNDMUL 5 + NEXTCTR 4 + vmovdqu TMP5, XMMWORD PTR[6*16 + rsp] + ROUNDMUL 6 + NEXTCTR 5 + vpxor TMP5, T, XMMWORD PTR[7*16 + rsp] + ROUNDMUL 7 + NEXTCTR 6 + + ROUND 8 + NEXTCTR 7 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor T, TMP2, TMP3 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + ROUND 9 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vmovdqu TMP5, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu TMP5, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu TMP5, XMMWORD PTR[14*16 + KS] +@@: + vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT] + vaesenclast CTR0, CTR0, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT] + vaesenclast CTR1, CTR1, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT] + vaesenclast CTR2, CTR2, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT] + vaesenclast CTR3, CTR3, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT] + vaesenclast CTR4, CTR4, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT] + vaesenclast CTR5, CTR5, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT] + vaesenclast CTR6, CTR6, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT] + vaesenclast CTR7, CTR7, TMP3 + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vpshufb CTR0, CTR0, BSWAPMASK + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vpshufb CTR1, CTR1, BSWAPMASK + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vpshufb CTR2, CTR2, BSWAPMASK + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vpshufb CTR3, CTR3, BSWAPMASK + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vpshufb CTR4, CTR4, BSWAPMASK + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vpshufb CTR5, CTR5, BSWAPMASK + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + vpshufb CTR6, CTR6, BSWAPMASK + vmovdqu XMMWORD PTR[7*16 + CT], CTR7 + vpshufb TMP5, CTR7, BSWAPMASK + + vmovdqa XMMWORD PTR[1*16 + rsp], CTR6 + vmovdqa XMMWORD PTR[2*16 + rsp], CTR5 + vmovdqa XMMWORD PTR[3*16 + rsp], CTR4 + vmovdqa XMMWORD PTR[4*16 + rsp], CTR3 + vmovdqa XMMWORD PTR[5*16 + rsp], CTR2 + vmovdqa XMMWORD PTR[6*16 + rsp], CTR1 + vmovdqa XMMWORD PTR[7*16 + rsp], CTR0 + + vpxor T, T, TMP4 + + lea CT, [8*16 + CT] + lea PT, [8*16 + PT] + jmp LEncDataOctets + +LEndEncOctets: + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + rsp] + KARATSUBA 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + rsp] + KARATSUBA 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + rsp] + KARATSUBA 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + rsp] + KARATSUBA 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + rsp] + KARATSUBA 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + rsp] + KARATSUBA 6 + vpxor TMP5, T, XMMWORD PTR[7*16 + rsp] + KARATSUBA 7 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor T, TMP2, TMP3 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vpxor T, T, TMP4 + + sub aluCTR, 7 + +LEncDataSingles: + + cmp len, 16 + jb LEncDataTail + sub len, 16 + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + vpxor TMP1, TMP1, XMMWORD PTR[PT] + vmovdqu XMMWORD PTR[CT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + + vpshufb TMP1, TMP1, BSWAPMASK + vpxor T, T, TMP1 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4 + + jmp LEncDataSingles + +LEncDataTail: + + test len, len + jz LEncDataEnd + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp] + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 +; zero a temp location + vpxor TMP2, TMP2, TMP2 + vmovdqa XMMWORD PTR[rsp], TMP2 +; copy as many bytes as needed + xor KS, KS + +@@: + cmp len, KS + je @f + mov al, [PT + KS] + mov [rsp + KS], al + inc KS + jmp @b +@@: + vpxor TMP1, TMP1, XMMWORD PTR[rsp] + vmovdqa XMMWORD PTR[rsp], TMP1 + xor KS, KS +@@: + cmp len, KS + je @f + mov al, [rsp + KS] + mov [CT + KS], al + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[rsp + KS], 0 + inc KS + jmp @b +@@: +BAIL: + vmovdqa TMP1, XMMWORD PTR[rsp] + vpshufb TMP1, TMP1, BSWAPMASK + vpxor T, T, TMP1 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4 + +LEncDataEnd: + + vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov rsp, rbp + + vmovdqu xmm6, XMMWORD PTR[rsp + 0*16] + vmovdqu xmm7, XMMWORD PTR[rsp + 1*16] + vmovdqu xmm8, XMMWORD PTR[rsp + 2*16] + vmovdqu xmm9, XMMWORD PTR[rsp + 3*16] + vmovdqu xmm10, XMMWORD PTR[rsp + 4*16] + vmovdqu xmm11, XMMWORD PTR[rsp + 5*16] + vmovdqu xmm12, XMMWORD PTR[rsp + 6*16] + vmovdqu xmm13, XMMWORD PTR[rsp + 7*16] + vmovdqu xmm14, XMMWORD PTR[rsp + 8*16] + vmovdqu xmm15, XMMWORD PTR[rsp + 9*16] + + add rsp, 10*16 + pop rbp + pop r13 + pop r12 + pop r11 + + vzeroupper + + ret +intel_aes_gcmENC ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Decrypt and Authenticate +; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmDEC PROC + +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + xor aluTMP, aluKSl + bswap aluTMP + mov [3*4 + i*16 + rsp], aluTMP +ENDM + +PT textequ +CT textequ + + test len, len + jnz LbeginDEC + ret + +LbeginDEC: + + vzeroupper + push r11 + push r12 + push r13 + push rbp + sub rsp, 10*16 + vmovdqu XMMWORD PTR[rsp + 0*16], xmm6 + vmovdqu XMMWORD PTR[rsp + 1*16], xmm7 + vmovdqu XMMWORD PTR[rsp + 2*16], xmm8 + vmovdqu XMMWORD PTR[rsp + 3*16], xmm9 + vmovdqu XMMWORD PTR[rsp + 4*16], xmm10 + vmovdqu XMMWORD PTR[rsp + 5*16], xmm11 + vmovdqu XMMWORD PTR[rsp + 6*16], xmm12 + vmovdqu XMMWORD PTR[rsp + 7*16], xmm13 + vmovdqu XMMWORD PTR[rsp + 8*16], xmm14 + vmovdqu XMMWORD PTR[rsp + 9*16], xmm15 + + mov rbp, rsp + sub rsp, 8*16 + and rsp, -16 + + vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx] + vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask] + mov KS, [16*16 + 3*16 + Gctx] + mov NR, [244 + KS] + + vpshufb CTR0, CTR0, BSWAPMASK + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + mov aluKSl, [3*4 + KS] + bswap aluCTR + bswap aluKSl + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[0*16 + rsp], TMP0 + + cmp len, 128 + jb LDecDataSingles +; Prepare the "top" counters + vmovdqu XMMWORD PTR[1*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[2*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[3*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[4*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[5*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[6*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[7*16 + rsp], TMP0 + + NEXTCTR 1 + NEXTCTR 2 + NEXTCTR 3 + NEXTCTR 4 + NEXTCTR 5 + NEXTCTR 6 + NEXTCTR 7 + +LDecDataOctets: + cmp len, 128 + jb LEndDecOctets + sub len, 128 + + vmovdqa CTR0, XMMWORD PTR[0*16 + rsp] + vmovdqa CTR1, XMMWORD PTR[1*16 + rsp] + vmovdqa CTR2, XMMWORD PTR[2*16 + rsp] + vmovdqa CTR3, XMMWORD PTR[3*16 + rsp] + vmovdqa CTR4, XMMWORD PTR[4*16 + rsp] + vmovdqa CTR5, XMMWORD PTR[5*16 + rsp] + vmovdqa CTR6, XMMWORD PTR[6*16 + rsp] + vmovdqa CTR7, XMMWORD PTR[7*16 + rsp] + + vmovdqu TMP5, XMMWORD PTR[7*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[6*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 1 + NEXTCTR 0 + vmovdqu TMP5, XMMWORD PTR[5*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 2 + NEXTCTR 1 + vmovdqu TMP5, XMMWORD PTR[4*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 3 + NEXTCTR 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 4 + NEXTCTR 3 + vmovdqu TMP5, XMMWORD PTR[2*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 5 + NEXTCTR 4 + vmovdqu TMP5, XMMWORD PTR[1*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 6 + NEXTCTR 5 + vmovdqu TMP5, XMMWORD PTR[0*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + vpxor TMP5, TMP5, T + ROUNDMUL 7 + NEXTCTR 6 + + ROUND 8 + NEXTCTR 7 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor T, TMP2, TMP3 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + ROUND 9 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vmovdqu TMP5, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu TMP5, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu TMP5, XMMWORD PTR[14*16 + KS] +@@: + vpxor TMP3, TMP5, XMMWORD PTR[0*16 + CT] + vaesenclast CTR0, CTR0, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[1*16 + CT] + vaesenclast CTR1, CTR1, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[2*16 + CT] + vaesenclast CTR2, CTR2, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[3*16 + CT] + vaesenclast CTR3, CTR3, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[4*16 + CT] + vaesenclast CTR4, CTR4, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[5*16 + CT] + vaesenclast CTR5, CTR5, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[6*16 + CT] + vaesenclast CTR6, CTR6, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[7*16 + CT] + vaesenclast CTR7, CTR7, TMP3 + + vmovdqu XMMWORD PTR[0*16 + PT], CTR0 + vmovdqu XMMWORD PTR[1*16 + PT], CTR1 + vmovdqu XMMWORD PTR[2*16 + PT], CTR2 + vmovdqu XMMWORD PTR[3*16 + PT], CTR3 + vmovdqu XMMWORD PTR[4*16 + PT], CTR4 + vmovdqu XMMWORD PTR[5*16 + PT], CTR5 + vmovdqu XMMWORD PTR[6*16 + PT], CTR6 + vmovdqu XMMWORD PTR[7*16 + PT], CTR7 + + vpxor T, T, TMP4 + + lea CT, [8*16 + CT] + lea PT, [8*16 + PT] + jmp LDecDataOctets + +LEndDecOctets: + + sub aluCTR, 7 + +LDecDataSingles: + + cmp len, 16 + jb LDecDataTail + sub len, 16 + + vmovdqa TMP1, XMMWORD PTR[0*16 + rsp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + + vmovdqu TMP2, XMMWORD PTR[CT] + vpxor TMP1, TMP1, TMP2 + vmovdqu XMMWORD PTR[PT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + + vpshufb TMP2, TMP2, BSWAPMASK + vpxor T, T, TMP2 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4 + + jmp LDecDataSingles + +LDecDataTail: + + test len, len + jz LDecDataEnd + + vmovdqa TMP1, XMMWORD PTR[0*16 + rsp] + inc aluCTR + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 +; copy as many bytes as needed + xor KS, KS +@@: + cmp len, KS + je @f + mov al, [CT + KS] + mov [rsp + KS], al + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[rsp + KS], 0 + inc KS + jmp @b +@@: + vmovdqa TMP2, XMMWORD PTR[rsp] + vpshufb TMP2, TMP2, BSWAPMASK + vpxor T, T, TMP2 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP5, TMP2, TMP3, TMP4 + + + vpxor TMP1, TMP1, XMMWORD PTR[rsp] + vmovdqa XMMWORD PTR[rsp], TMP1 + xor KS, KS +@@: + cmp len, KS + je @f + mov al, [rsp + KS] + mov [PT + KS], al + inc KS + jmp @b +@@: + +LDecDataEnd: + + vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov rsp, rbp + + vmovdqu xmm6, XMMWORD PTR[rsp + 0*16] + vmovdqu xmm7, XMMWORD PTR[rsp + 1*16] + vmovdqu xmm8, XMMWORD PTR[rsp + 2*16] + vmovdqu xmm9, XMMWORD PTR[rsp + 3*16] + vmovdqu xmm10, XMMWORD PTR[rsp + 4*16] + vmovdqu xmm11, XMMWORD PTR[rsp + 5*16] + vmovdqu xmm12, XMMWORD PTR[rsp + 6*16] + vmovdqu xmm13, XMMWORD PTR[rsp + 7*16] + vmovdqu xmm14, XMMWORD PTR[rsp + 8*16] + vmovdqu xmm15, XMMWORD PTR[rsp + 9*16] + + add rsp, 10*16 + pop rbp + pop r13 + pop r12 + pop r11 + + vzeroupper + + ret +ret +intel_aes_gcmDEC ENDP + + +END diff --git a/security/nss/lib/freebl/intel-gcm-x86-masm.asm b/security/nss/lib/freebl/intel-gcm-x86-masm.asm new file mode 100644 index 0000000000..32f4257884 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm-x86-masm.asm @@ -0,0 +1,1207 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.MODEL FLAT, C +.XMM + +.DATA +ALIGN 16 +Lone dq 1,0 +Ltwo dq 2,0 +Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh +Lpoly dq 01h, 0c200000000000000h + +.CODE + + +GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4 + vpclmulqdq TMP1, SRC2, SRC1, 0h + vpclmulqdq TMP4, SRC2, SRC1, 011h + + vpshufd TMP2, SRC2, 78 + vpshufd TMP3, SRC1, 78 + vpxor TMP2, TMP2, SRC2 + vpxor TMP3, TMP3, SRC1 + + vpclmulqdq TMP2, TMP2, TMP3, 0h + vpxor TMP2, TMP2, TMP1 + vpxor TMP2, TMP2, TMP4 + + vpslldq TMP3, TMP2, 8 + vpsrldq TMP2, TMP2, 8 + + vpxor TMP1, TMP1, TMP3 + vpxor TMP4, TMP4, TMP2 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpxor DST, TMP1, TMP4 + + ENDM + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the final GCM tag +; void intel_aes_gcmTAG(unsigned char Htbl[16*16], +; unsigned char *Tp, +; unsigned int Mlen, +; unsigned int Alen, +; unsigned char* X0, +; unsigned char* TAG); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmTAG PROC + +Htbl textequ +Tp textequ +X0 textequ +TAG textequ + +T textequ +TMP0 textequ + + push ebx + + mov Htbl, [esp + 2*4 + 0*4] + mov Tp, [esp + 2*4 + 1*4] + mov X0, [esp + 2*4 + 4*4] + mov TAG, [esp + 2*4 + 5*4] + + vzeroupper + vmovdqu T, XMMWORD PTR[Tp] + + vpxor TMP0, TMP0, TMP0 + vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 2*4], 0 + vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 3*4], 2 + vpsllq TMP0, TMP0, 3 + + vpxor T, T, TMP0 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + + vpshufb T, T, [Lbswap_mask] + vpxor T, T, [X0] + vmovdqu XMMWORD PTR[TAG], T + vzeroupper + + pop ebx + + ret + +intel_aes_gcmTAG ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the H table +; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmINIT PROC + +Htbl textequ +KS textequ +NR textequ + +T textequ +TMP0 textequ + + mov Htbl, [esp + 4*1 + 0*4] + mov KS, [esp + 4*1 + 1*4] + mov NR, [esp + 4*1 + 2*4] + + vzeroupper + ; AES-ENC(0) + vmovdqu T, XMMWORD PTR[KS] + lea KS, [16 + KS] + dec NR +Lenc_loop: + vaesenc T, T, [KS] + lea KS, [16 + KS] + dec NR + jnz Lenc_loop + + vaesenclast T, T, [KS] + vpshufb T, T, [Lbswap_mask] + + ;Calculate H` = GFMUL(H, 2) + vpsrad xmm3, T, 31 + vpshufd xmm3, xmm3, 0ffh + vpand xmm5, xmm3, [Lpoly] + vpsrld xmm3, T, 31 + vpslld xmm4, T, 1 + vpslldq xmm3, xmm3, 4 + vpxor T, xmm4, xmm3 + vpxor T, T, xmm5 + + vmovdqu TMP0, T + vmovdqu XMMWORD PTR[Htbl + 0*16], T + + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2 + + i = 1 + WHILE i LT 8 + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + vmovdqu XMMWORD PTR[Htbl + i*16], T + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2 + i = i+1 + ENDM + vzeroupper + ret +intel_aes_gcmINIT ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Authenticate only +; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmAAD PROC + +Htbl textequ +inp textequ +len textequ +Tp textequ +hlp0 textequ + +DATA textequ +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +Xhi textequ + +KARATSUBA_AAD MACRO i + vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h + vpxor TMP2, TMP2, TMP3 +ENDM + + cmp DWORD PTR[esp + 1*3 + 2*4], 0 + jnz LbeginAAD + ret + +LbeginAAD: + push ebx + push esi + + mov Htbl, [esp + 4*3 + 0*4] + mov inp, [esp + 4*3 + 1*4] + mov len, [esp + 4*3 + 2*4] + mov Tp, [esp + 4*3 + 3*4] + + vzeroupper + + vpxor Xhi, Xhi, Xhi + + vmovdqu T, XMMWORD PTR[Tp] + ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first + mov hlp0, len + and hlp0, 128-1 + jz Lmod_loop + + and len, -128 + sub hlp0, 16 + + ; Prefix block + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + + vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + hlp0], 0h + vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + hlp0], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h + + lea inp, [inp+16] + test hlp0, hlp0 + jnz Lpre_loop + jmp Lred1 + + ;hash remaining prefix bocks (up to 7 total prefix blocks) +Lpre_loop: + + sub hlp0, 16 + + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h + vpxor TMP2, TMP2, TMP3 + + test hlp0, hlp0 + lea inp, [inp+16] + jnz Lpre_loop + +Lred1: + + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + +Lmod_loop: + + sub len, 16*8 + jb Ldone + ; Block #0 + vmovdqu DATA, XMMWORD PTR[inp + 16*7] + vpshufb DATA, DATA, XMMWORD PTR[Lbswap_mask] + + vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + 0*16], 0h + vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + 0*16], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + 0*16], 0h + + ; Block #1 + vmovdqu DATA, XMMWORD PTR[inp + 16*6] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 1 + + ; Block #2 + vmovdqu DATA, XMMWORD PTR[inp + 16*5] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 2 + + vpxor T, T, TMP4 ;reduction stage 1b + + ; Block #3 + vmovdqu DATA, XMMWORD PTR[inp + 16*4] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 3 + ; Block #4 + vmovdqu DATA, XMMWORD PTR[inp + 16*3] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 4 + + vpxor T, T, TMP4 ;reduction stage 2b + ; Block #5 + vmovdqu DATA, XMMWORD PTR[inp + 16*2] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 5 + + vpxor T, T, Xhi ;reduction finalize + ; Block #6 + vmovdqu DATA, XMMWORD PTR[inp + 16*1] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 6 + ; Block #7 + vmovdqu DATA, XMMWORD PTR[inp + 16*0] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + KARATSUBA_AAD 7 + ; Aggregated 8 blocks, now karatsuba fixup + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + + lea inp, [inp + 16*8] + jmp Lmod_loop + +Ldone: + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpxor T, T, Xhi + vmovdqu XMMWORD PTR[Tp], T + vzeroupper + + pop esi + pop ebx + ret + +intel_aes_gcmAAD ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Encrypt and Authenticate +; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmENC PROC + +PT textequ +CT textequ +Htbl textequ +Gctx textequ +len textequ +KS textequ +NR textequ + +aluCTR textequ +aluTMP textequ + +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +TMP5 textequ + +CTR0 textequ +CTR1 textequ +CTR2 textequ +CTR3 textequ +CTR4 textequ +CTR5 textequ +CTR6 textequ + +ROUND MACRO i + vmovdqu xmm7, XMMWORD PTR[i*16 + KS] + vaesenc CTR0, CTR0, xmm7 + vaesenc CTR1, CTR1, xmm7 + vaesenc CTR2, CTR2, xmm7 + vaesenc CTR3, CTR3, xmm7 + vaesenc CTR4, CTR4, xmm7 + vaesenc CTR5, CTR5, xmm7 + vaesenc CTR6, CTR6, xmm7 +ENDM + +KARATSUBA MACRO i + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h + vpxor TMP0, TMP0, TMP3 + vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] + vpclmulqdq TMP3, TMP5, TMP4, 011h + vpxor TMP1, TMP1, TMP3 + vpclmulqdq TMP3, TMP5, TMP4, 000h + vpxor TMP2, TMP2, TMP3 +ENDM + +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + bswap aluTMP + xor aluTMP, [3*4 + KS] + mov [3*4 + 8*16 + i*16 + esp], aluTMP +ENDM + + cmp DWORD PTR[1*4 + 3*4 + esp], 0 + jne LbeginENC + ret + +LbeginENC: + + vzeroupper + push ebp + push ebx + push esi + push edi + + mov ebp, esp + sub esp, 16*16 + and esp, -16 + + mov PT, [ebp + 5*4 + 0*4] + mov CT, [ebp + 5*4 + 1*4] + mov Gctx, [ebp + 5*4 + 2*4] + + mov KS, [16*16 + 3*16 + Gctx] + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + bswap aluCTR + + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[8*16 + 0*16 + esp], TMP0 + + cmp len, 16*7 + jb LEncDataSingles +; Prepare the "top" counters + vmovdqu XMMWORD PTR[8*16 + 1*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 2*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 3*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 4*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 5*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 6*16 + esp], TMP0 + + vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] +; Encrypt the initial 7 blocks + sub len, 16*7 + vpaddd CTR1, CTR0, XMMWORD PTR[Lone] + vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo] + vpaddd CTR3, CTR2, XMMWORD PTR[Lone] + vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo] + vpaddd CTR5, CTR4, XMMWORD PTR[Lone] + vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo] + + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] + vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] + vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] + vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] + vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] + vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] + vpshufb CTR6, CTR6, XMMWORD PTR[Lbswap_mask] + + vmovdqu xmm7, XMMWORD PTR[0*16 + KS] + vpxor CTR0, CTR0, xmm7 + vpxor CTR1, CTR1, xmm7 + vpxor CTR2, CTR2, xmm7 + vpxor CTR3, CTR3, xmm7 + vpxor CTR4, CTR4, xmm7 + vpxor CTR5, CTR5, xmm7 + vpxor CTR6, CTR6, xmm7 + + ROUND 1 + + add aluCTR, 7 + mov aluTMP, aluCTR + bswap aluTMP + xor aluTMP, [KS + 3*4] + mov [8*16 + 0*16 + 3*4 + esp], aluTMP + + ROUND 2 + NEXTCTR 1 + ROUND 3 + NEXTCTR 2 + ROUND 4 + NEXTCTR 3 + ROUND 5 + NEXTCTR 4 + ROUND 6 + NEXTCTR 5 + ROUND 7 + NEXTCTR 6 + ROUND 8 + ROUND 9 + vmovdqu xmm7, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu xmm7, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu xmm7, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast CTR0, CTR0, xmm7 + vaesenclast CTR1, CTR1, xmm7 + vaesenclast CTR2, CTR2, xmm7 + vaesenclast CTR3, CTR3, xmm7 + vaesenclast CTR4, CTR4, xmm7 + vaesenclast CTR5, CTR5, xmm7 + vaesenclast CTR6, CTR6, xmm7 + + vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT] + vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT] + vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT] + vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT] + vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT] + vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT] + vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT] + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] + vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] + vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] + vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] + vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] + vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] + vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask] + + vmovdqa XMMWORD PTR[1*16 + esp], CTR5 + vmovdqa XMMWORD PTR[2*16 + esp], CTR4 + vmovdqa XMMWORD PTR[3*16 + esp], CTR3 + vmovdqa XMMWORD PTR[4*16 + esp], CTR2 + vmovdqa XMMWORD PTR[5*16 + esp], CTR1 + vmovdqa XMMWORD PTR[6*16 + esp], CTR0 + + lea CT, [7*16 + CT] + lea PT, [7*16 + PT] + jmp LEncData7 + +LEncData7: + cmp len, 16*7 + jb LEndEnc7 + sub len, 16*7 + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + esp] + KARATSUBA 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + esp] + KARATSUBA 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + esp] + KARATSUBA 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + esp] + KARATSUBA 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + esp] + KARATSUBA 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + esp] + vpxor TMP5, TMP5, T + KARATSUBA 6 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor TMP5, TMP2, TMP3 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpxor TMP5, TMP5, TMP4 + vmovdqu T, TMP5 + + vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + esp] + vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + esp] + vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + esp] + vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + esp] + vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + esp] + vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + esp] + vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + esp] + + ROUND 1 + NEXTCTR 0 + ROUND 2 + NEXTCTR 1 + ROUND 3 + NEXTCTR 2 + ROUND 4 + NEXTCTR 3 + ROUND 5 + NEXTCTR 4 + ROUND 6 + NEXTCTR 5 + ROUND 7 + NEXTCTR 6 + + ROUND 8 + ROUND 9 + + vmovdqu xmm7, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu xmm7, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu xmm7, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast CTR0, CTR0, xmm7 + vaesenclast CTR1, CTR1, xmm7 + vaesenclast CTR2, CTR2, xmm7 + vaesenclast CTR3, CTR3, xmm7 + vaesenclast CTR4, CTR4, xmm7 + vaesenclast CTR5, CTR5, xmm7 + vaesenclast CTR6, CTR6, xmm7 + + vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT] + vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT] + vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT] + vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT] + vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT] + vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT] + vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT] + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] + vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] + vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] + vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] + vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] + vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] + vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask] + + vmovdqa XMMWORD PTR[1*16 + esp], CTR5 + vmovdqa XMMWORD PTR[2*16 + esp], CTR4 + vmovdqa XMMWORD PTR[3*16 + esp], CTR3 + vmovdqa XMMWORD PTR[4*16 + esp], CTR2 + vmovdqa XMMWORD PTR[5*16 + esp], CTR1 + vmovdqa XMMWORD PTR[6*16 + esp], CTR0 + + lea CT, [7*16 + CT] + lea PT, [7*16 + PT] + jmp LEncData7 + +LEndEnc7: + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + esp] + KARATSUBA 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + esp] + KARATSUBA 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + esp] + KARATSUBA 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + esp] + KARATSUBA 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + esp] + KARATSUBA 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + esp] + vpxor TMP5, TMP5, T + KARATSUBA 6 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor TMP5, TMP2, TMP3 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpxor TMP5, TMP5, TMP4 + vmovdqu T, TMP5 + + sub aluCTR, 6 + +LEncDataSingles: + + cmp len, 16 + jb LEncDataTail + sub len, 16 + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + vpxor TMP1, TMP1, XMMWORD PTR[PT] + vmovdqu XMMWORD PTR[CT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + + jmp LEncDataSingles + +LEncDataTail: + + cmp len, 0 + je LEncDataEnd + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp] + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 +; zero a temp location + vpxor TMP2, TMP2, TMP2 + vmovdqa XMMWORD PTR[esp], TMP2 +; copy as many bytes as needed + xor KS, KS + mov aluTMP, edx +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[PT + KS] + mov BYTE PTR[esp + KS], dl + inc KS + jmp @b +@@: + vpxor TMP1, TMP1, XMMWORD PTR[esp] + vmovdqa XMMWORD PTR[esp], TMP1 + xor KS, KS +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[esp + KS] + mov BYTE PTR[CT + KS], dl + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[esp + KS], 0 + inc KS + jmp @b +@@: + mov edx, aluTMP + vmovdqa TMP1, XMMWORD PTR[esp] + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + +LEncDataEnd: + inc aluCTR + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov esp, ebp + pop edi + pop esi + pop ebx + pop ebp + + + vzeroupper + + ret +intel_aes_gcmENC ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Decrypt and Authenticate +; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + bswap aluTMP + xor aluTMP, [3*4 + KS] + mov [3*4 + i*16 + esp], aluTMP +ENDM + +intel_aes_gcmDEC PROC + + cmp DWORD PTR[1*4 + 3*4 + esp], 0 + jne LbeginDEC + ret + +LbeginDEC: + + vzeroupper + push ebp + push ebx + push esi + push edi + + mov ebp, esp + sub esp, 8*16 + and esp, -16 + + mov CT, [ebp + 5*4 + 0*4] + mov PT, [ebp + 5*4 + 1*4] + mov Gctx, [ebp + 5*4 + 2*4] + + mov KS, [16*16 + 3*16 + Gctx] + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + bswap aluCTR + + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[0*16 + esp], TMP0 + + cmp len, 16*7 + jb LDecDataSingles + vmovdqu XMMWORD PTR[1*16 + esp], TMP0 + vmovdqu XMMWORD PTR[2*16 + esp], TMP0 + vmovdqu XMMWORD PTR[3*16 + esp], TMP0 + vmovdqu XMMWORD PTR[4*16 + esp], TMP0 + vmovdqu XMMWORD PTR[5*16 + esp], TMP0 + vmovdqu XMMWORD PTR[6*16 + esp], TMP0 + dec aluCTR + +LDecData7: + cmp len, 16*7 + jb LDecData7End + sub len, 16*7 + + vmovdqu TMP5, XMMWORD PTR[0*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + vpxor TMP5, TMP5, T + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[6*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[6*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + NEXTCTR 0 + vmovdqu TMP5, XMMWORD PTR[1*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 5 + NEXTCTR 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 4 + NEXTCTR 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 3 + NEXTCTR 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 2 + NEXTCTR 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 1 + NEXTCTR 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 0 + NEXTCTR 6 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor TMP5, TMP2, TMP3 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpxor TMP5, TMP5, TMP4 + vmovdqu T, TMP5 + + vmovdqa CTR0, XMMWORD PTR[0*16 + esp] + vmovdqa CTR1, XMMWORD PTR[1*16 + esp] + vmovdqa CTR2, XMMWORD PTR[2*16 + esp] + vmovdqa CTR3, XMMWORD PTR[3*16 + esp] + vmovdqa CTR4, XMMWORD PTR[4*16 + esp] + vmovdqa CTR5, XMMWORD PTR[5*16 + esp] + vmovdqa CTR6, XMMWORD PTR[6*16 + esp] + + ROUND 1 + ROUND 2 + ROUND 3 + ROUND 4 + ROUND 5 + ROUND 6 + ROUND 7 + ROUND 8 + ROUND 9 + vmovdqu xmm7, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu xmm7, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu xmm7, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast CTR0, CTR0, xmm7 + vaesenclast CTR1, CTR1, xmm7 + vaesenclast CTR2, CTR2, xmm7 + vaesenclast CTR3, CTR3, xmm7 + vaesenclast CTR4, CTR4, xmm7 + vaesenclast CTR5, CTR5, xmm7 + vaesenclast CTR6, CTR6, xmm7 + + vpxor CTR0, CTR0, XMMWORD PTR[0*16 + CT] + vpxor CTR1, CTR1, XMMWORD PTR[1*16 + CT] + vpxor CTR2, CTR2, XMMWORD PTR[2*16 + CT] + vpxor CTR3, CTR3, XMMWORD PTR[3*16 + CT] + vpxor CTR4, CTR4, XMMWORD PTR[4*16 + CT] + vpxor CTR5, CTR5, XMMWORD PTR[5*16 + CT] + vpxor CTR6, CTR6, XMMWORD PTR[6*16 + CT] + + vmovdqu XMMWORD PTR[0*16 + PT], CTR0 + vmovdqu XMMWORD PTR[1*16 + PT], CTR1 + vmovdqu XMMWORD PTR[2*16 + PT], CTR2 + vmovdqu XMMWORD PTR[3*16 + PT], CTR3 + vmovdqu XMMWORD PTR[4*16 + PT], CTR4 + vmovdqu XMMWORD PTR[5*16 + PT], CTR5 + vmovdqu XMMWORD PTR[6*16 + PT], CTR6 + + lea CT, [7*16 + CT] + lea PT, [7*16 + PT] + jmp LDecData7 + +LDecData7End: + + NEXTCTR 0 + +LDecDataSingles: + + cmp len, 16 + jb LDecDataTail + sub len, 16 + + vmovdqu TMP1, XMMWORD PTR[CT] + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + + vmovdqa TMP1, XMMWORD PTR[0*16 + esp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + vpxor TMP1, TMP1, XMMWORD PTR[CT] + vmovdqu XMMWORD PTR[PT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + jmp LDecDataSingles + +LDecDataTail: + + cmp len, 0 + je LDecDataEnd + + vmovdqa TMP1, XMMWORD PTR[0*16 + esp] + inc aluCTR + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast xmm7, TMP1, TMP2 + +; copy as many bytes as needed + xor KS, KS + mov aluTMP, edx +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[CT + KS] + mov BYTE PTR[esp + KS], dl + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[esp + KS], 0 + inc KS + jmp @b +@@: + mov edx, aluTMP + vmovdqa TMP1, XMMWORD PTR[esp] + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + + vpxor xmm7, xmm7, XMMWORD PTR[esp] + vmovdqa XMMWORD PTR[esp], xmm7 + xor KS, KS + mov aluTMP, edx +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[esp + KS] + mov BYTE PTR[PT + KS], dl + inc KS + jmp @b +@@: + mov edx, aluTMP + +LDecDataEnd: + + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov esp, ebp + pop edi + pop esi + pop ebx + pop ebp + + vzeroupper + + ret +intel_aes_gcmDEC ENDP + + +END diff --git a/security/nss/lib/freebl/intel-gcm.h b/security/nss/lib/freebl/intel-gcm.h new file mode 100644 index 0000000000..e0221159d7 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm.h @@ -0,0 +1,97 @@ +/******************************************************************************/ +/* LICENSE: */ +/* This submission to NSS is to be made available under the terms of the */ +/* Mozilla Public License, v. 2.0. You can obtain one at http: */ +/* //mozilla.org/MPL/2.0/. */ +/******************************************************************************/ +/* Copyright(c) 2013, Intel Corp. */ +/******************************************************************************/ +/* Reference: */ +/* [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication */ +/* Instruction and its Usage for Computing the GCM Mode (Rev. 2.01) */ +/* http://software.intel.com/sites/default/files/article/165685/clmul-wp-r*/ +/*ev-2.01-2012-09-21.pdf */ +/* [2] S. Gueron, M. E. Kounavis: Efficient Implementation of the Galois */ +/* Counter Mode Using a Carry-less Multiplier and a Fast Reduction */ +/* Algorithm. Information Processing Letters 110: 549-553 (2010). */ +/* [3] S. Gueron: AES Performance on the 2nd Generation Intel(R) Core(TM) */ +/* Processor Family (to be posted) (2012). */ +/* [4] S. Gueron: Fast GHASH computations for speeding up AES-GCM (to be */ +/* published) (2012). */ + +#ifndef INTEL_GCM_H +#define INTEL_GCM_H 1 + +#include "blapii.h" + +typedef struct intel_AES_GCMContextStr intel_AES_GCMContext; + +intel_AES_GCMContext *intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params); + +void intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit); + +SECStatus intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +SECStatus intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus intel_AES_GCM_EncryptAEAD(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); +SECStatus intel_AES_GCM_DecryptAEAD(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); + +/* Prototypes of functions in the assembler file for fast AES-GCM, using + Intel AES-NI and CLMUL-NI, as described in [1] + [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication + Instruction and its Usage for Computing the GCM Mode */ + +/* Prepares the constants used in the aggregated reduction method */ +void intel_aes_gcmINIT(unsigned char Htbl[16 * 16], + unsigned char *KS, + int NR); + +/* Produces the final GHASH value */ +void intel_aes_gcmTAG(unsigned char Htbl[16 * 16], + unsigned char *Tp, + unsigned long Mlen, + unsigned long Alen, + unsigned char *X0, + unsigned char *TAG); + +/* Hashes the Additional Authenticated Data, should be used before enc/dec. + Operates on whole blocks only. Partial blocks should be padded externally. */ +void intel_aes_gcmAAD(unsigned char Htbl[16 * 16], + unsigned char *AAD, + unsigned long Alen, + unsigned char *Tp); + +/* Encrypts and hashes the Plaintext. + Operates on any length of data, however partial block should only be encrypted + at the last call, otherwise the result will be incorrect. */ +void intel_aes_gcmENC(const unsigned char *PT, + unsigned char *CT, + void *Gctx, + unsigned long len); + +/* Similar to ENC, but decrypts the Ciphertext. */ +void intel_aes_gcmDEC(const unsigned char *CT, + unsigned char *PT, + void *Gctx, + unsigned long len); + +#endif diff --git a/security/nss/lib/freebl/intel-gcm.s b/security/nss/lib/freebl/intel-gcm.s new file mode 100644 index 0000000000..5b5cf5d4bb --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm.s @@ -0,0 +1,1340 @@ +# LICENSE: +# This submission to NSS is to be made available under the terms of the +# Mozilla Public License, v. 2.0. You can obtain one at http: +# //mozilla.org/MPL/2.0/. +################################################################################ +# Copyright(c) 2012, Intel Corp. + +.align 16 +.Lone: +.quad 1,0 +.Ltwo: +.quad 2,0 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lshuff_mask: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.Lpoly: +.quad 0x1, 0xc200000000000000 + + +################################################################################ +# Generates the final GCM tag +# void intel_aes_gcmTAG(uint8_t Htbl[16*16], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG); +.type intel_aes_gcmTAG,@function +.globl intel_aes_gcmTAG +.align 16 +intel_aes_gcmTAG: + +.set Htbl, %rdi +.set Tp, %rsi +.set Mlen, %rdx +.set Alen, %rcx +.set X0, %r8 +.set TAG, %r9 + +.set T,%xmm0 +.set TMP0,%xmm1 + + vmovdqu (Tp), T + vpshufb .Lbswap_mask(%rip), T, T + vpxor TMP0, TMP0, TMP0 + shl $3, Mlen + shl $3, Alen + vpinsrq $0, Mlen, TMP0, TMP0 + vpinsrq $1, Alen, TMP0, TMP0 + vpxor TMP0, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + vpshufb .Lbswap_mask(%rip), T, T + vpxor (X0), T, T + vmovdqu T, (TAG) + +ret +.size intel_aes_gcmTAG, .-intel_aes_gcmTAG +################################################################################ +# Generates the H table +# void intel_aes_gcmINIT(uint8_t Htbl[16*16], uint8_t *KS, int NR); +.type intel_aes_gcmINIT,@function +.globl intel_aes_gcmINIT +.align 16 +intel_aes_gcmINIT: + +.set Htbl, %rdi +.set KS, %rsi +.set NR, %edx + +.set T,%xmm0 +.set TMP0,%xmm1 + +CALCULATE_POWERS_OF_H: + vmovdqu 16*0(KS), T + vaesenc 16*1(KS), T, T + vaesenc 16*2(KS), T, T + vaesenc 16*3(KS), T, T + vaesenc 16*4(KS), T, T + vaesenc 16*5(KS), T, T + vaesenc 16*6(KS), T, T + vaesenc 16*7(KS), T, T + vaesenc 16*8(KS), T, T + vaesenc 16*9(KS), T, T + vmovdqu 16*10(KS), TMP0 + cmp $10, NR + je .LH0done + vaesenc 16*10(KS), T, T + vaesenc 16*11(KS), T, T + vmovdqu 16*12(KS), TMP0 + cmp $12, NR + je .LH0done + vaesenc 16*12(KS), T, T + vaesenc 16*13(KS), T, T + vmovdqu 16*14(KS), TMP0 + +.LH0done: + vaesenclast TMP0, T, T + + vpshufb .Lbswap_mask(%rip), T, T + + vmovdqu T, TMP0 + # Calculate H` = GFMUL(H, 2) + vpsrld $7 , T , %xmm3 + vmovdqu .Lshuff_mask(%rip), %xmm4 + vpshufb %xmm4, %xmm3 , %xmm3 + movq $0xff00 , %rax + vmovq %rax, %xmm4 + vpshufb %xmm3, %xmm4 , %xmm4 + vmovdqu .Lpoly(%rip), %xmm5 + vpand %xmm4, %xmm5, %xmm5 + vpsrld $31, T, %xmm3 + vpslld $1, T, %xmm4 + vpslldq $4, %xmm3, %xmm3 + vpxor %xmm3, %xmm4, T #xmm1 holds now p(x)<<1 + + #adding p(x)<<1 to xmm5 + vpxor %xmm5, T , T + vmovdqu T, TMP0 + vmovdqu T, (Htbl) # H * 2 + call GFMUL + vmovdqu T, 16(Htbl) # H^2 * 2 + call GFMUL + vmovdqu T, 32(Htbl) # H^3 * 2 + call GFMUL + vmovdqu T, 48(Htbl) # H^4 * 2 + call GFMUL + vmovdqu T, 64(Htbl) # H^5 * 2 + call GFMUL + vmovdqu T, 80(Htbl) # H^6 * 2 + call GFMUL + vmovdqu T, 96(Htbl) # H^7 * 2 + call GFMUL + vmovdqu T, 112(Htbl) # H^8 * 2 + + # Precalculations for the reduce 4 step + vpshufd $78, (Htbl), %xmm8 + vpshufd $78, 16(Htbl), %xmm9 + vpshufd $78, 32(Htbl), %xmm10 + vpshufd $78, 48(Htbl), %xmm11 + vpshufd $78, 64(Htbl), %xmm12 + vpshufd $78, 80(Htbl), %xmm13 + vpshufd $78, 96(Htbl), %xmm14 + vpshufd $78, 112(Htbl), %xmm15 + + vpxor (Htbl), %xmm8, %xmm8 + vpxor 16(Htbl), %xmm9, %xmm9 + vpxor 32(Htbl), %xmm10, %xmm10 + vpxor 48(Htbl), %xmm11, %xmm11 + vpxor 64(Htbl), %xmm12, %xmm12 + vpxor 80(Htbl), %xmm13, %xmm13 + vpxor 96(Htbl), %xmm14, %xmm14 + vpxor 112(Htbl), %xmm15, %xmm15 + + vmovdqu %xmm8, 128(Htbl) + vmovdqu %xmm9, 144(Htbl) + vmovdqu %xmm10, 160(Htbl) + vmovdqu %xmm11, 176(Htbl) + vmovdqu %xmm12, 192(Htbl) + vmovdqu %xmm13, 208(Htbl) + vmovdqu %xmm14, 224(Htbl) + vmovdqu %xmm15, 240(Htbl) + + ret +.size intel_aes_gcmINIT, .-intel_aes_gcmINIT +################################################################################ +# Authenticate only +# void intel_aes_gcmAAD(uint8_t Htbl[16*16], uint8_t *AAD, uint64_t Alen, uint8_t *Tp); + +.globl intel_aes_gcmAAD +.type intel_aes_gcmAAD,@function +.align 16 +intel_aes_gcmAAD: + +.set DATA, %xmm0 +.set T, %xmm1 +.set BSWAP_MASK, %xmm2 +.set TMP0, %xmm3 +.set TMP1, %xmm4 +.set TMP2, %xmm5 +.set TMP3, %xmm6 +.set TMP4, %xmm7 +.set Xhi, %xmm9 + +.set Htbl, %rdi +.set inp, %rsi +.set len, %rdx +.set Tp, %rcx + +.set hlp0, %r11 + +.macro KARATSUBA_AAD i + vpclmulqdq $0x00, 16*\i(Htbl), DATA, TMP3 + vpxor TMP3, TMP0, TMP0 + vpclmulqdq $0x11, 16*\i(Htbl), DATA, TMP3 + vpxor TMP3, TMP1, TMP1 + vpshufd $78, DATA, TMP3 + vpxor DATA, TMP3, TMP3 + vpclmulqdq $0x00, 16*(\i+8)(Htbl), TMP3, TMP3 + vpxor TMP3, TMP2, TMP2 +.endm + + test len, len + jnz .LbeginAAD + ret + +.LbeginAAD: + + push hlp0 + vzeroupper + + vmovdqa .Lbswap_mask(%rip), BSWAP_MASK + + vpxor Xhi, Xhi, Xhi + + vmovdqu (Tp),T + vpshufb BSWAP_MASK,T,T + + # we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first + mov len, hlp0 + and $~-128, hlp0 + + jz .Lmod_loop + + sub hlp0, len + sub $16, hlp0 + + #hash first prefix block + vmovdqu (inp), DATA + vpshufb BSWAP_MASK, DATA, DATA + vpxor T, DATA, DATA + + vpclmulqdq $0x00, (Htbl, hlp0), DATA, TMP0 + vpclmulqdq $0x11, (Htbl, hlp0), DATA, TMP1 + vpshufd $78, DATA, TMP2 + vpxor DATA, TMP2, TMP2 + vpclmulqdq $0x00, 16*8(Htbl, hlp0), TMP2, TMP2 + + lea 16(inp), inp + test hlp0, hlp0 + jnz .Lpre_loop + jmp .Lred1 + + #hash remaining prefix bocks (up to 7 total prefix blocks) +.align 64 +.Lpre_loop: + + sub $16, hlp0 + + vmovdqu (inp),DATA # next data block + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x00, (Htbl,hlp0), DATA, TMP3 + vpxor TMP3, TMP0, TMP0 + vpclmulqdq $0x11, (Htbl,hlp0), DATA, TMP3 + vpxor TMP3, TMP1, TMP1 + vpshufd $78, DATA, TMP3 + vpxor DATA, TMP3, TMP3 + vpclmulqdq $0x00, 16*8(Htbl,hlp0), TMP3, TMP3 + vpxor TMP3, TMP2, TMP2 + + test hlp0, hlp0 + + lea 16(inp), inp + + jnz .Lpre_loop + +.Lred1: + vpxor TMP0, TMP2, TMP2 + vpxor TMP1, TMP2, TMP2 + vpsrldq $8, TMP2, TMP3 + vpslldq $8, TMP2, TMP2 + + vpxor TMP3, TMP1, Xhi + vpxor TMP2, TMP0, T + +.align 64 +.Lmod_loop: + sub $0x80, len + jb .Ldone + + vmovdqu 16*7(inp),DATA # Ii + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x00, (Htbl), DATA, TMP0 + vpclmulqdq $0x11, (Htbl), DATA, TMP1 + vpshufd $78, DATA, TMP2 + vpxor DATA, TMP2, TMP2 + vpclmulqdq $0x00, 16*8(Htbl), TMP2, TMP2 + ######################################################### + vmovdqu 16*6(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + KARATSUBA_AAD 1 + ######################################################### + vmovdqu 16*5(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 1a + vpalignr $8, T, T, T + + KARATSUBA_AAD 2 + + vpxor TMP4, T, T #reduction stage 1b + ######################################################### + vmovdqu 16*4(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + KARATSUBA_AAD 3 + ######################################################### + vmovdqu 16*3(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 2a + vpalignr $8, T, T, T + + KARATSUBA_AAD 4 + + vpxor TMP4, T, T #reduction stage 2b + ######################################################### + vmovdqu 16*2(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + KARATSUBA_AAD 5 + + vpxor Xhi, T, T #reduction finalize + ######################################################### + vmovdqu 16*1(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + KARATSUBA_AAD 6 + ######################################################### + vmovdqu 16*0(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + vpxor T,DATA,DATA + + KARATSUBA_AAD 7 + ######################################################### + vpxor TMP0, TMP2, TMP2 # karatsuba fixup + vpxor TMP1, TMP2, TMP2 + vpsrldq $8, TMP2, TMP3 + vpslldq $8, TMP2, TMP2 + + vpxor TMP3, TMP1, Xhi + vpxor TMP2, TMP0, T + + lea 16*8(inp), inp + jmp .Lmod_loop + ######################################################### + +.Ldone: + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3 + vpalignr $8, T, T, T + vpxor TMP3, T, T + + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3 + vpalignr $8, T, T, T + vpxor TMP3, T, T + + vpxor Xhi, T, T + +.Lsave: + vpshufb BSWAP_MASK,T, T + vmovdqu T,(Tp) + vzeroupper + + pop hlp0 + ret +.size intel_aes_gcmAAD,.-intel_aes_gcmAAD + +################################################################################ +# Encrypt and Authenticate +# void intel_aes_gcmENC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len); +.type intel_aes_gcmENC,@function +.globl intel_aes_gcmENC +.align 16 +intel_aes_gcmENC: + +.set PT,%rdi +.set CT,%rsi +.set Htbl, %rdx +.set len, %rcx +.set KS,%r9 +.set NR,%r10d + +.set Gctx, %rdx + +.set T,%xmm0 +.set TMP0,%xmm1 +.set TMP1,%xmm2 +.set TMP2,%xmm3 +.set TMP3,%xmm4 +.set TMP4,%xmm5 +.set TMP5,%xmm6 +.set CTR0,%xmm7 +.set CTR1,%xmm8 +.set CTR2,%xmm9 +.set CTR3,%xmm10 +.set CTR4,%xmm11 +.set CTR5,%xmm12 +.set CTR6,%xmm13 +.set CTR7,%xmm14 +.set CTR,%xmm15 + +.macro ROUND i + vmovdqu \i*16(KS), TMP3 + vaesenc TMP3, CTR0, CTR0 + vaesenc TMP3, CTR1, CTR1 + vaesenc TMP3, CTR2, CTR2 + vaesenc TMP3, CTR3, CTR3 + vaesenc TMP3, CTR4, CTR4 + vaesenc TMP3, CTR5, CTR5 + vaesenc TMP3, CTR6, CTR6 + vaesenc TMP3, CTR7, CTR7 +.endm + +.macro ROUNDMUL i + + vmovdqu \i*16(%rsp), TMP5 + vmovdqu \i*16(KS), TMP3 + + vaesenc TMP3, CTR0, CTR0 + vaesenc TMP3, CTR1, CTR1 + vaesenc TMP3, CTR2, CTR2 + vaesenc TMP3, CTR3, CTR3 + + vpshufd $78, TMP5, TMP4 + vpxor TMP5, TMP4, TMP4 + + vaesenc TMP3, CTR4, CTR4 + vaesenc TMP3, CTR5, CTR5 + vaesenc TMP3, CTR6, CTR6 + vaesenc TMP3, CTR7, CTR7 + + vpclmulqdq $0x00, 128+\i*16(Htbl), TMP4, TMP3 + vpxor TMP3, TMP0, TMP0 + vmovdqa \i*16(Htbl), TMP4 + vpclmulqdq $0x11, TMP4, TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + +.endm + +.macro KARATSUBA i + vmovdqu \i*16(%rsp), TMP5 + + vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 +.endm + + test len, len + jnz .Lbegin + ret + +.Lbegin: + + vzeroupper + push %rbp + push %rbx + + movq %rsp, %rbp + sub $128, %rsp + andq $-16, %rsp + + vmovdqu 288(Gctx), CTR + vmovdqu 272(Gctx), T + mov 304(Gctx), KS +# AESContext->Nr + mov 244(KS), NR + + vpshufb .Lbswap_mask(%rip), CTR, CTR + vpshufb .Lbswap_mask(%rip), T, T + + cmp $128, len + jb .LDataSingles + +# Encrypt the first eight blocks + sub $128, len + vmovdqa CTR, CTR0 + vpaddd .Lone(%rip), CTR0, CTR1 + vpaddd .Ltwo(%rip), CTR0, CTR2 + vpaddd .Lone(%rip), CTR2, CTR3 + vpaddd .Ltwo(%rip), CTR2, CTR4 + vpaddd .Lone(%rip), CTR4, CTR5 + vpaddd .Ltwo(%rip), CTR4, CTR6 + vpaddd .Lone(%rip), CTR6, CTR7 + vpaddd .Ltwo(%rip), CTR6, CTR + + vpshufb .Lbswap_mask(%rip), CTR0, CTR0 + vpshufb .Lbswap_mask(%rip), CTR1, CTR1 + vpshufb .Lbswap_mask(%rip), CTR2, CTR2 + vpshufb .Lbswap_mask(%rip), CTR3, CTR3 + vpshufb .Lbswap_mask(%rip), CTR4, CTR4 + vpshufb .Lbswap_mask(%rip), CTR5, CTR5 + vpshufb .Lbswap_mask(%rip), CTR6, CTR6 + vpshufb .Lbswap_mask(%rip), CTR7, CTR7 + + vpxor (KS), CTR0, CTR0 + vpxor (KS), CTR1, CTR1 + vpxor (KS), CTR2, CTR2 + vpxor (KS), CTR3, CTR3 + vpxor (KS), CTR4, CTR4 + vpxor (KS), CTR5, CTR5 + vpxor (KS), CTR6, CTR6 + vpxor (KS), CTR7, CTR7 + + ROUND 1 + ROUND 2 + ROUND 3 + ROUND 4 + ROUND 5 + ROUND 6 + ROUND 7 + ROUND 8 + ROUND 9 + + vmovdqu 160(KS), TMP5 + cmp $12, NR + jb .LLast1 + + ROUND 10 + ROUND 11 + + vmovdqu 192(KS), TMP5 + cmp $14, NR + jb .LLast1 + + ROUND 12 + ROUND 13 + + vmovdqu 224(KS), TMP5 + +.LLast1: + + vpxor (PT), TMP5, TMP3 + vaesenclast TMP3, CTR0, CTR0 + vpxor 16(PT), TMP5, TMP3 + vaesenclast TMP3, CTR1, CTR1 + vpxor 32(PT), TMP5, TMP3 + vaesenclast TMP3, CTR2, CTR2 + vpxor 48(PT), TMP5, TMP3 + vaesenclast TMP3, CTR3, CTR3 + vpxor 64(PT), TMP5, TMP3 + vaesenclast TMP3, CTR4, CTR4 + vpxor 80(PT), TMP5, TMP3 + vaesenclast TMP3, CTR5, CTR5 + vpxor 96(PT), TMP5, TMP3 + vaesenclast TMP3, CTR6, CTR6 + vpxor 112(PT), TMP5, TMP3 + vaesenclast TMP3, CTR7, CTR7 + + vmovdqu .Lbswap_mask(%rip), TMP3 + + vmovdqu CTR0, (CT) + vpshufb TMP3, CTR0, CTR0 + vmovdqu CTR1, 16(CT) + vpshufb TMP3, CTR1, CTR1 + vmovdqu CTR2, 32(CT) + vpshufb TMP3, CTR2, CTR2 + vmovdqu CTR3, 48(CT) + vpshufb TMP3, CTR3, CTR3 + vmovdqu CTR4, 64(CT) + vpshufb TMP3, CTR4, CTR4 + vmovdqu CTR5, 80(CT) + vpshufb TMP3, CTR5, CTR5 + vmovdqu CTR6, 96(CT) + vpshufb TMP3, CTR6, CTR6 + vmovdqu CTR7, 112(CT) + vpshufb TMP3, CTR7, CTR7 + + lea 128(CT), CT + lea 128(PT), PT + jmp .LDataOctets + +# Encrypt 8 blocks each time while hashing previous 8 blocks +.align 64 +.LDataOctets: + cmp $128, len + jb .LEndOctets + sub $128, len + + vmovdqa CTR7, TMP5 + vmovdqa CTR6, 1*16(%rsp) + vmovdqa CTR5, 2*16(%rsp) + vmovdqa CTR4, 3*16(%rsp) + vmovdqa CTR3, 4*16(%rsp) + vmovdqa CTR2, 5*16(%rsp) + vmovdqa CTR1, 6*16(%rsp) + vmovdqa CTR0, 7*16(%rsp) + + vmovdqa CTR, CTR0 + vpaddd .Lone(%rip), CTR0, CTR1 + vpaddd .Ltwo(%rip), CTR0, CTR2 + vpaddd .Lone(%rip), CTR2, CTR3 + vpaddd .Ltwo(%rip), CTR2, CTR4 + vpaddd .Lone(%rip), CTR4, CTR5 + vpaddd .Ltwo(%rip), CTR4, CTR6 + vpaddd .Lone(%rip), CTR6, CTR7 + vpaddd .Ltwo(%rip), CTR6, CTR + + vmovdqu (KS), TMP4 + vpshufb TMP3, CTR0, CTR0 + vpxor TMP4, CTR0, CTR0 + vpshufb TMP3, CTR1, CTR1 + vpxor TMP4, CTR1, CTR1 + vpshufb TMP3, CTR2, CTR2 + vpxor TMP4, CTR2, CTR2 + vpshufb TMP3, CTR3, CTR3 + vpxor TMP4, CTR3, CTR3 + vpshufb TMP3, CTR4, CTR4 + vpxor TMP4, CTR4, CTR4 + vpshufb TMP3, CTR5, CTR5 + vpxor TMP4, CTR5, CTR5 + vpshufb TMP3, CTR6, CTR6 + vpxor TMP4, CTR6, CTR6 + vpshufb TMP3, CTR7, CTR7 + vpxor TMP4, CTR7, CTR7 + + vmovdqu 16*0(Htbl), TMP3 + vpclmulqdq $0x11, TMP3, TMP5, TMP1 + vpclmulqdq $0x00, TMP3, TMP5, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+0*16(Htbl), TMP3 + vpclmulqdq $0x00, TMP3, TMP5, TMP0 + + ROUNDMUL 1 + + ROUNDMUL 2 + + ROUNDMUL 3 + + ROUNDMUL 4 + + ROUNDMUL 5 + + ROUNDMUL 6 + + vpxor 7*16(%rsp), T, TMP5 + vmovdqu 7*16(KS), TMP3 + + vaesenc TMP3, CTR0, CTR0 + vaesenc TMP3, CTR1, CTR1 + vaesenc TMP3, CTR2, CTR2 + vaesenc TMP3, CTR3, CTR3 + + vpshufd $78, TMP5, TMP4 + vpxor TMP5, TMP4, TMP4 + + vaesenc TMP3, CTR4, CTR4 + vaesenc TMP3, CTR5, CTR5 + vaesenc TMP3, CTR6, CTR6 + vaesenc TMP3, CTR7, CTR7 + + vpclmulqdq $0x11, 7*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, 7*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpclmulqdq $0x00, 128+7*16(Htbl), TMP4, TMP3 + vpxor TMP3, TMP0, TMP0 + + ROUND 8 + vmovdqa .Lpoly(%rip), TMP5 + + vpxor TMP1, TMP0, TMP0 + vpxor TMP2, TMP0, TMP0 + vpsrldq $8, TMP0, TMP3 + vpxor TMP3, TMP1, TMP4 + vpslldq $8, TMP0, TMP3 + vpxor TMP3, TMP2, T + + vpclmulqdq $0x10, TMP5, T, TMP1 + vpalignr $8, T, T, T + vpxor T, TMP1, T + + ROUND 9 + + vpclmulqdq $0x10, TMP5, T, TMP1 + vpalignr $8, T, T, T + vpxor T, TMP1, T + + vmovdqu 160(KS), TMP5 + cmp $10, NR + jbe .LLast2 + + ROUND 10 + ROUND 11 + + vmovdqu 192(KS), TMP5 + cmp $12, NR + jbe .LLast2 + + ROUND 12 + ROUND 13 + + vmovdqu 224(KS), TMP5 + +.LLast2: + + vpxor (PT), TMP5, TMP3 + vaesenclast TMP3, CTR0, CTR0 + vpxor 16(PT), TMP5, TMP3 + vaesenclast TMP3, CTR1, CTR1 + vpxor 32(PT), TMP5, TMP3 + vaesenclast TMP3, CTR2, CTR2 + vpxor 48(PT), TMP5, TMP3 + vaesenclast TMP3, CTR3, CTR3 + vpxor 64(PT), TMP5, TMP3 + vaesenclast TMP3, CTR4, CTR4 + vpxor 80(PT), TMP5, TMP3 + vaesenclast TMP3, CTR5, CTR5 + vpxor 96(PT), TMP5, TMP3 + vaesenclast TMP3, CTR6, CTR6 + vpxor 112(PT), TMP5, TMP3 + vaesenclast TMP3, CTR7, CTR7 + + vmovdqu .Lbswap_mask(%rip), TMP3 + + vmovdqu CTR0, (CT) + vpshufb TMP3, CTR0, CTR0 + vmovdqu CTR1, 16(CT) + vpshufb TMP3, CTR1, CTR1 + vmovdqu CTR2, 32(CT) + vpshufb TMP3, CTR2, CTR2 + vmovdqu CTR3, 48(CT) + vpshufb TMP3, CTR3, CTR3 + vmovdqu CTR4, 64(CT) + vpshufb TMP3, CTR4, CTR4 + vmovdqu CTR5, 80(CT) + vpshufb TMP3, CTR5, CTR5 + vmovdqu CTR6, 96(CT) + vpshufb TMP3, CTR6, CTR6 + vmovdqu CTR7,112(CT) + vpshufb TMP3, CTR7, CTR7 + + vpxor TMP4, T, T + + lea 128(CT), CT + lea 128(PT), PT + jmp .LDataOctets + +.LEndOctets: + + vmovdqa CTR7, TMP5 + vmovdqa CTR6, 1*16(%rsp) + vmovdqa CTR5, 2*16(%rsp) + vmovdqa CTR4, 3*16(%rsp) + vmovdqa CTR3, 4*16(%rsp) + vmovdqa CTR2, 5*16(%rsp) + vmovdqa CTR1, 6*16(%rsp) + vmovdqa CTR0, 7*16(%rsp) + + vmovdqu 16*0(Htbl), TMP3 + vpclmulqdq $0x11, TMP3, TMP5, TMP1 + vpclmulqdq $0x00, TMP3, TMP5, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+0*16(Htbl), TMP3 + vpclmulqdq $0x00, TMP3, TMP5, TMP0 + + KARATSUBA 1 + KARATSUBA 2 + KARATSUBA 3 + KARATSUBA 4 + KARATSUBA 5 + KARATSUBA 6 + + vmovdqu 7*16(%rsp), TMP5 + vpxor T, TMP5, TMP5 + vmovdqu 16*7(Htbl), TMP4 + vpclmulqdq $0x11, TMP4, TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+7*16(Htbl), TMP4 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 + + vpxor TMP1, TMP0, TMP0 + vpxor TMP2, TMP0, TMP0 + + vpsrldq $8, TMP0, TMP3 + vpxor TMP3, TMP1, TMP4 + vpslldq $8, TMP0, TMP3 + vpxor TMP3, TMP2, T + + vmovdqa .Lpoly(%rip), TMP2 + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + vpxor TMP4, T, T + +#Here we encrypt any remaining whole block +.LDataSingles: + + cmp $16, len + jb .LDataTail + sub $16, len + + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LLast3 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LLast3 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 + +.LLast3: + vaesenclast TMP2, TMP1, TMP1 + + vpxor (PT), TMP1, TMP1 + vmovdqu TMP1, (CT) + addq $16, CT + addq $16, PT + + vpshufb .Lbswap_mask(%rip), TMP1, TMP1 + vpxor TMP1, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + + jmp .LDataSingles + +#Here we encypt the final partial block, if there is one +.LDataTail: + + test len, len + jz DATA_END +# First prepare the counter block + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LLast4 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LLast4 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 + +.LLast4: + vaesenclast TMP2, TMP1, TMP1 +#Zero a temp location + vpxor TMP2, TMP2, TMP2 + vmovdqa TMP2, (%rsp) + +# Copy the required bytes only (could probably use rep movsb) + xor KS, KS +.LEncCpy: + cmp KS, len + je .LEncCpyEnd + movb (PT, KS, 1), %r8b + movb %r8b, (%rsp, KS, 1) + inc KS + jmp .LEncCpy +.LEncCpyEnd: +# Xor with the counter block + vpxor (%rsp), TMP1, TMP0 +# Again, store at temp location + vmovdqa TMP0, (%rsp) +# Copy only the required bytes to CT, and zero the rest for the hash + xor KS, KS +.LEncCpy2: + cmp KS, len + je .LEncCpy3 + movb (%rsp, KS, 1), %r8b + movb %r8b, (CT, KS, 1) + inc KS + jmp .LEncCpy2 +.LEncCpy3: + cmp $16, KS + je .LEndCpy3 + movb $0, (%rsp, KS, 1) + inc KS + jmp .LEncCpy3 +.LEndCpy3: + vmovdqa (%rsp), TMP0 + + vpshufb .Lbswap_mask(%rip), TMP0, TMP0 + vpxor TMP0, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + +DATA_END: + + vpshufb .Lbswap_mask(%rip), T, T + vpshufb .Lbswap_mask(%rip), CTR, CTR + vmovdqu T, 272(Gctx) + vmovdqu CTR, 288(Gctx) + + movq %rbp, %rsp + + popq %rbx + popq %rbp + ret + .size intel_aes_gcmENC, .-intel_aes_gcmENC + +######################### +# Decrypt and Authenticate +# void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len); +.type intel_aes_gcmDEC,@function +.globl intel_aes_gcmDEC +.align 16 +intel_aes_gcmDEC: +# parameter 1: CT # input +# parameter 2: PT # output +# parameter 3: %rdx # Gctx +# parameter 4: %rcx # len + +.macro DEC_KARATSUBA i + vmovdqu (7-\i)*16(CT), TMP5 + vpshufb .Lbswap_mask(%rip), TMP5, TMP5 + + vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 +.endm + +.set PT,%rsi +.set CT,%rdi +.set Htbl, %rdx +.set len, %rcx +.set KS,%r9 +.set NR,%r10d + +.set Gctx, %rdx + +.set T,%xmm0 +.set TMP0,%xmm1 +.set TMP1,%xmm2 +.set TMP2,%xmm3 +.set TMP3,%xmm4 +.set TMP4,%xmm5 +.set TMP5,%xmm6 +.set CTR0,%xmm7 +.set CTR1,%xmm8 +.set CTR2,%xmm9 +.set CTR3,%xmm10 +.set CTR4,%xmm11 +.set CTR5,%xmm12 +.set CTR6,%xmm13 +.set CTR7,%xmm14 +.set CTR,%xmm15 + + test len, len + jnz .LbeginDec + ret + +.LbeginDec: + + pushq %rbp + pushq %rbx + movq %rsp, %rbp + sub $128, %rsp + andq $-16, %rsp + vmovdqu 288(Gctx), CTR + vmovdqu 272(Gctx), T + mov 304(Gctx), KS +# AESContext->Nr + mov 244(KS), NR + + vpshufb .Lbswap_mask(%rip), CTR, CTR + vpshufb .Lbswap_mask(%rip), T, T + + vmovdqu .Lbswap_mask(%rip), TMP3 + jmp .LDECOctets + +# Decrypt 8 blocks each time while hashing them at the same time +.align 64 +.LDECOctets: + + cmp $128, len + jb .LDECSingles + sub $128, len + + vmovdqa CTR, CTR0 + vpaddd .Lone(%rip), CTR0, CTR1 + vpaddd .Ltwo(%rip), CTR0, CTR2 + vpaddd .Lone(%rip), CTR2, CTR3 + vpaddd .Ltwo(%rip), CTR2, CTR4 + vpaddd .Lone(%rip), CTR4, CTR5 + vpaddd .Ltwo(%rip), CTR4, CTR6 + vpaddd .Lone(%rip), CTR6, CTR7 + vpaddd .Ltwo(%rip), CTR6, CTR + + vpshufb TMP3, CTR0, CTR0 + vpshufb TMP3, CTR1, CTR1 + vpshufb TMP3, CTR2, CTR2 + vpshufb TMP3, CTR3, CTR3 + vpshufb TMP3, CTR4, CTR4 + vpshufb TMP3, CTR5, CTR5 + vpshufb TMP3, CTR6, CTR6 + vpshufb TMP3, CTR7, CTR7 + + vmovdqu (KS), TMP3 + vpxor TMP3, CTR0, CTR0 + vpxor TMP3, CTR1, CTR1 + vpxor TMP3, CTR2, CTR2 + vpxor TMP3, CTR3, CTR3 + vpxor TMP3, CTR4, CTR4 + vpxor TMP3, CTR5, CTR5 + vpxor TMP3, CTR6, CTR6 + vpxor TMP3, CTR7, CTR7 + + vmovdqu 7*16(CT), TMP5 + vpshufb .Lbswap_mask(%rip), TMP5, TMP5 + vmovdqu 16*0(Htbl), TMP3 + vpclmulqdq $0x11, TMP3, TMP5, TMP1 + vpclmulqdq $0x00, TMP3, TMP5, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+0*16(Htbl), TMP3 + vpclmulqdq $0x00, TMP3, TMP5, TMP0 + + ROUND 1 + DEC_KARATSUBA 1 + + ROUND 2 + DEC_KARATSUBA 2 + + ROUND 3 + DEC_KARATSUBA 3 + + ROUND 4 + DEC_KARATSUBA 4 + + ROUND 5 + DEC_KARATSUBA 5 + + ROUND 6 + DEC_KARATSUBA 6 + + ROUND 7 + + vmovdqu 0*16(CT), TMP5 + vpshufb .Lbswap_mask(%rip), TMP5, TMP5 + vpxor T, TMP5, TMP5 + vmovdqu 16*7(Htbl), TMP4 + + vpclmulqdq $0x11, TMP4, TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+7*16(Htbl), TMP4 + + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 + + ROUND 8 + + vpxor TMP1, TMP0, TMP0 + vpxor TMP2, TMP0, TMP0 + + vpsrldq $8, TMP0, TMP3 + vpxor TMP3, TMP1, TMP4 + vpslldq $8, TMP0, TMP3 + vpxor TMP3, TMP2, T + vmovdqa .Lpoly(%rip), TMP2 + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + ROUND 9 + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + vmovdqu 160(KS), TMP5 + cmp $10, NR + + jbe .LDECLast1 + + ROUND 10 + ROUND 11 + + vmovdqu 192(KS), TMP5 + cmp $12, NR + + jbe .LDECLast1 + + ROUND 12 + ROUND 13 + + vmovdqu 224(KS), TMP5 + +.LDECLast1: + + vpxor (CT), TMP5, TMP3 + vaesenclast TMP3, CTR0, CTR0 + vpxor 16(CT), TMP5, TMP3 + vaesenclast TMP3, CTR1, CTR1 + vpxor 32(CT), TMP5, TMP3 + vaesenclast TMP3, CTR2, CTR2 + vpxor 48(CT), TMP5, TMP3 + vaesenclast TMP3, CTR3, CTR3 + vpxor 64(CT), TMP5, TMP3 + vaesenclast TMP3, CTR4, CTR4 + vpxor 80(CT), TMP5, TMP3 + vaesenclast TMP3, CTR5, CTR5 + vpxor 96(CT), TMP5, TMP3 + vaesenclast TMP3, CTR6, CTR6 + vpxor 112(CT), TMP5, TMP3 + vaesenclast TMP3, CTR7, CTR7 + + vmovdqu .Lbswap_mask(%rip), TMP3 + + vmovdqu CTR0, (PT) + vmovdqu CTR1, 16(PT) + vmovdqu CTR2, 32(PT) + vmovdqu CTR3, 48(PT) + vmovdqu CTR4, 64(PT) + vmovdqu CTR5, 80(PT) + vmovdqu CTR6, 96(PT) + vmovdqu CTR7,112(PT) + + vpxor TMP4, T, T + + lea 128(CT), CT + lea 128(PT), PT + jmp .LDECOctets + +#Here we decrypt and hash any remaining whole block +.LDECSingles: + + cmp $16, len + jb .LDECTail + sub $16, len + + vmovdqu (CT), TMP1 + vpshufb .Lbswap_mask(%rip), TMP1, TMP1 + vpxor TMP1, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + + + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LDECLast2 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LDECLast2 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 +.LDECLast2: + vaesenclast TMP2, TMP1, TMP1 + + vpxor (CT), TMP1, TMP1 + vmovdqu TMP1, (PT) + addq $16, CT + addq $16, PT + jmp .LDECSingles + +#Here we decrypt the final partial block, if there is one +.LDECTail: + test len, len + jz .LDEC_END + + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LDECLast3 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LDECLast3 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 + +.LDECLast3: + vaesenclast TMP2, TMP1, TMP1 + + vpxor TMP2, TMP2, TMP2 + vmovdqa TMP2, (%rsp) +# Copy the required bytes only (could probably use rep movsb) + xor KS, KS +.LDecCpy: + cmp KS, len + je .LDecCpy2 + movb (CT, KS, 1), %r8b + movb %r8b, (%rsp, KS, 1) + inc KS + jmp .LDecCpy +.LDecCpy2: + cmp $16, KS + je .LDecCpyEnd + movb $0, (%rsp, KS, 1) + inc KS + jmp .LDecCpy2 +.LDecCpyEnd: +# Xor with the counter block + vmovdqa (%rsp), TMP0 + vpxor TMP0, TMP1, TMP1 +# Again, store at temp location + vmovdqa TMP1, (%rsp) +# Copy only the required bytes to PT, and zero the rest for the hash + xor KS, KS +.LDecCpy3: + cmp KS, len + je .LDecCpyEnd3 + movb (%rsp, KS, 1), %r8b + movb %r8b, (PT, KS, 1) + inc KS + jmp .LDecCpy3 +.LDecCpyEnd3: + vpshufb .Lbswap_mask(%rip), TMP0, TMP0 + vpxor TMP0, T, T + vmovdqu (Htbl), TMP0 + call GFMUL +.LDEC_END: + + vpshufb .Lbswap_mask(%rip), T, T + vpshufb .Lbswap_mask(%rip), CTR, CTR + vmovdqu T, 272(Gctx) + vmovdqu CTR, 288(Gctx) + + movq %rbp, %rsp + + popq %rbx + popq %rbp + ret + .size intel_aes_gcmDEC, .-intel_aes_gcmDEC +######################### +# a = T +# b = TMP0 - remains unchanged +# res = T +# uses also TMP1,TMP2,TMP3,TMP4 +# __m128i GFMUL(__m128i A, __m128i B); +.type GFMUL,@function +.globl GFMUL +GFMUL: + vpclmulqdq $0x00, TMP0, T, TMP1 + vpclmulqdq $0x11, TMP0, T, TMP4 + + vpshufd $78, T, TMP2 + vpshufd $78, TMP0, TMP3 + vpxor T, TMP2, TMP2 + vpxor TMP0, TMP3, TMP3 + + vpclmulqdq $0x00, TMP3, TMP2, TMP2 + vpxor TMP1, TMP2, TMP2 + vpxor TMP4, TMP2, TMP2 + + vpslldq $8, TMP2, TMP3 + vpsrldq $8, TMP2, TMP2 + + vpxor TMP3, TMP1, TMP1 + vpxor TMP2, TMP4, TMP4 + + vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2 + vpshufd $78, TMP1, TMP3 + vpxor TMP3, TMP2, TMP1 + + vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2 + vpshufd $78, TMP1, TMP3 + vpxor TMP3, TMP2, TMP1 + + vpxor TMP4, TMP1, T + ret +.size GFMUL, .-GFMUL + diff --git a/security/nss/lib/freebl/jpake.c b/security/nss/lib/freebl/jpake.c new file mode 100644 index 0000000000..741c7a8760 --- /dev/null +++ b/security/nss/lib/freebl/jpake.c @@ -0,0 +1,495 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "secerr.h" +#include "secitem.h" +#include "secmpi.h" + +/* Hash an item's length and then its value. Only items smaller than 2^16 bytes + * are allowed. Lengths are hashed in network byte order. This is designed + * to match the OpenSSL J-PAKE implementation. + */ +static mp_err +hashSECItem(HASHContext *hash, const SECItem *it) +{ + unsigned char length[2]; + + if (it->len > 0xffff) + return MP_BADARG; + + length[0] = (unsigned char)(it->len >> 8); + length[1] = (unsigned char)(it->len); + hash->hashobj->update(hash->hash_context, length, 2); + hash->hashobj->update(hash->hash_context, it->data, it->len); + return MP_OKAY; +} + +/* Hash all public components of the signature, each prefixed with its + length, and then convert the hash to an mp_int. */ +static mp_err +hashPublicParams(HASH_HashType hashType, const SECItem *g, + const SECItem *gv, const SECItem *gx, + const SECItem *signerID, mp_int *h) +{ + mp_err err; + unsigned char hBuf[HASH_LENGTH_MAX]; + SECItem hItem; + HASHContext hash; + + hash.hashobj = HASH_GetRawHashObject(hashType); + if (hash.hashobj == NULL || hash.hashobj->length > sizeof hBuf) { + return MP_BADARG; + } + hash.hash_context = hash.hashobj->create(); + if (hash.hash_context == NULL) { + return MP_MEM; + } + + hItem.data = hBuf; + hItem.len = hash.hashobj->length; + + hash.hashobj->begin(hash.hash_context); + CHECK_MPI_OK(hashSECItem(&hash, g)); + CHECK_MPI_OK(hashSECItem(&hash, gv)); + CHECK_MPI_OK(hashSECItem(&hash, gx)); + CHECK_MPI_OK(hashSECItem(&hash, signerID)); + hash.hashobj->end(hash.hash_context, hItem.data, &hItem.len, + sizeof hBuf); + SECITEM_TO_MPINT(hItem, h); + +cleanup: + if (hash.hash_context != NULL) { + hash.hashobj->destroy(hash.hash_context, PR_TRUE); + } + + return err; +} + +/* Generate a Schnorr signature for round 1 or round 2 */ +SECStatus +JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *x, + const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r) +{ + SECStatus rv = SECSuccess; + mp_err err; + mp_int p; + mp_int q; + mp_int g; + mp_int X; + mp_int GX; + mp_int V; + mp_int GV; + mp_int h; + mp_int tmp; + mp_int R; + SECItem v; + + if (!arena || + !pqg || !pqg->prime.data || pqg->prime.len == 0 || + !pqg->subPrime.data || pqg->subPrime.len == 0 || + !pqg->base.data || pqg->base.len == 0 || + !signerID || !signerID->data || signerID->len == 0 || + !x || !x->data || x->len == 0 || + (testRandom && (!testRandom->data || testRandom->len == 0)) || + (gxIn == NULL && (!gxOut || gxOut->data != NULL)) || + (gxIn != NULL && (!gxIn->data || gxIn->len == 0 || gxOut != NULL)) || + !gv || gv->data != NULL || + !r || r->data != NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&X) = 0; + MP_DIGITS(&GX) = 0; + MP_DIGITS(&V) = 0; + MP_DIGITS(&GV) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&R) = 0; + + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&X)); + CHECK_MPI_OK(mp_init(&GX)); + CHECK_MPI_OK(mp_init(&V)); + CHECK_MPI_OK(mp_init(&GV)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_init(&R)); + + SECITEM_TO_MPINT(pqg->prime, &p); + SECITEM_TO_MPINT(pqg->subPrime, &q); + SECITEM_TO_MPINT(pqg->base, &g); + SECITEM_TO_MPINT(*x, &X); + + /* gx = g^x */ + if (gxIn == NULL) { + CHECK_MPI_OK(mp_exptmod(&g, &X, &p, &GX)); + MPINT_TO_SECITEM(&GX, gxOut, arena); + gxIn = gxOut; + } else { + SECITEM_TO_MPINT(*gxIn, &GX); + } + + /* v is a random value in the q subgroup */ + if (testRandom == NULL) { + v.data = NULL; + rv = DSA_NewRandom(arena, &pqg->subPrime, &v); + if (rv != SECSuccess) { + goto cleanup; + } + } else { + v.data = testRandom->data; + v.len = testRandom->len; + } + SECITEM_TO_MPINT(v, &V); + + /* gv = g^v (mod q), random v, 1 <= v < q */ + CHECK_MPI_OK(mp_exptmod(&g, &V, &p, &GV)); + MPINT_TO_SECITEM(&GV, gv, arena); + + /* h = H(g, gv, gx, signerID) */ + CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gxIn, signerID, + &h)); + + /* r = v - x*h (mod q) */ + CHECK_MPI_OK(mp_mulmod(&X, &h, &q, &tmp)); + CHECK_MPI_OK(mp_submod(&V, &tmp, &q, &R)); + MPINT_TO_SECITEM(&R, r, arena); + +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&X); + mp_clear(&GX); + mp_clear(&V); + mp_clear(&GV); + mp_clear(&h); + mp_clear(&tmp); + mp_clear(&R); + + if (rv == SECSuccess && err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* Verify a Schnorr signature generated by the peer in round 1 or round 2. */ +SECStatus +JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *peerID, + const SECItem *gx, const SECItem *gv, const SECItem *r) +{ + SECStatus rv = SECSuccess; + mp_err err; + mp_int p; + mp_int q; + mp_int g; + mp_int p_minus_1; + mp_int GX; + mp_int h; + mp_int one; + mp_int R; + mp_int gr; + mp_int gxh; + mp_int gr_gxh; + SECItem calculated; + + if (!arena || + !pqg || !pqg->prime.data || pqg->prime.len == 0 || + !pqg->subPrime.data || pqg->subPrime.len == 0 || + !pqg->base.data || pqg->base.len == 0 || + !signerID || !signerID->data || signerID->len == 0 || + !peerID || !peerID->data || peerID->len == 0 || + !gx || !gx->data || gx->len == 0 || + !gv || !gv->data || gv->len == 0 || + !r || !r->data || r->len == 0 || + SECITEM_CompareItem(signerID, peerID) == SECEqual) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&p_minus_1) = 0; + MP_DIGITS(&GX) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&one) = 0; + MP_DIGITS(&R) = 0; + MP_DIGITS(&gr) = 0; + MP_DIGITS(&gxh) = 0; + MP_DIGITS(&gr_gxh) = 0; + calculated.data = NULL; + + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&p_minus_1)); + CHECK_MPI_OK(mp_init(&GX)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&one)); + CHECK_MPI_OK(mp_init(&R)); + CHECK_MPI_OK(mp_init(&gr)); + CHECK_MPI_OK(mp_init(&gxh)); + CHECK_MPI_OK(mp_init(&gr_gxh)); + + SECITEM_TO_MPINT(pqg->prime, &p); + SECITEM_TO_MPINT(pqg->subPrime, &q); + SECITEM_TO_MPINT(pqg->base, &g); + SECITEM_TO_MPINT(*gx, &GX); + SECITEM_TO_MPINT(*r, &R); + + CHECK_MPI_OK(mp_sub_d(&p, 1, &p_minus_1)); + CHECK_MPI_OK(mp_exptmod(&GX, &q, &p, &one)); + /* Check g^x is in [1, p-2], R is in [0, q-1], and (g^x)^q mod p == 1 */ + if (!(mp_cmp_z(&GX) > 0 && + mp_cmp(&GX, &p_minus_1) < 0 && + mp_cmp(&R, &q) < 0 && + mp_cmp_d(&one, 1) == 0)) { + goto badSig; + } + + CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gx, peerID, + &h)); + + /* Calculate g^v = g^r * g^x^h */ + CHECK_MPI_OK(mp_exptmod(&g, &R, &p, &gr)); + CHECK_MPI_OK(mp_exptmod(&GX, &h, &p, &gxh)); + CHECK_MPI_OK(mp_mulmod(&gr, &gxh, &p, &gr_gxh)); + + /* Compare calculated g^v to given g^v */ + MPINT_TO_SECITEM(&gr_gxh, &calculated, arena); + if (calculated.len == gv->len && + NSS_SecureMemcmp(calculated.data, gv->data, calculated.len) == 0) { + rv = SECSuccess; + } else { + badSig: + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; + } + +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&p_minus_1); + mp_clear(&GX); + mp_clear(&h); + mp_clear(&one); + mp_clear(&R); + mp_clear(&gr); + mp_clear(&gxh); + mp_clear(&gr_gxh); + + if (rv == SECSuccess && err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* Calculate base = gx1*gx3*gx4 (mod p), i.e. g^(x1+x3+x4) (mod p) */ +static mp_err +jpake_Round2Base(const SECItem *gx1, const SECItem *gx3, + const SECItem *gx4, const mp_int *p, mp_int *base) +{ + mp_err err; + mp_int GX1; + mp_int GX3; + mp_int GX4; + mp_int tmp; + + MP_DIGITS(&GX1) = 0; + MP_DIGITS(&GX3) = 0; + MP_DIGITS(&GX4) = 0; + MP_DIGITS(&tmp) = 0; + + CHECK_MPI_OK(mp_init(&GX1)); + CHECK_MPI_OK(mp_init(&GX3)); + CHECK_MPI_OK(mp_init(&GX4)); + CHECK_MPI_OK(mp_init(&tmp)); + + SECITEM_TO_MPINT(*gx1, &GX1); + SECITEM_TO_MPINT(*gx3, &GX3); + SECITEM_TO_MPINT(*gx4, &GX4); + + /* In round 2, the peer/attacker sends us g^x3 and g^x4 and the protocol + requires that these values are distinct. */ + if (mp_cmp(&GX3, &GX4) == 0) { + return MP_BADARG; + } + + CHECK_MPI_OK(mp_mul(&GX1, &GX3, &tmp)); + CHECK_MPI_OK(mp_mul(&tmp, &GX4, &tmp)); + CHECK_MPI_OK(mp_mod(&tmp, p, base)); + +cleanup: + mp_clear(&GX1); + mp_clear(&GX3); + mp_clear(&GX4); + mp_clear(&tmp); + return err; +} + +SECStatus +JPAKE_Round2(PLArenaPool *arena, + const SECItem *p, const SECItem *q, const SECItem *gx1, + const SECItem *gx3, const SECItem *gx4, SECItem *base, + const SECItem *x2, const SECItem *s, SECItem *x2s) +{ + mp_err err; + mp_int P; + mp_int Q; + mp_int X2; + mp_int S; + mp_int result; + + if (!arena || + !p || !p->data || p->len == 0 || + !q || !q->data || q->len == 0 || + !gx1 || !gx1->data || gx1->len == 0 || + !gx3 || !gx3->data || gx3->len == 0 || + !gx4 || !gx4->data || gx4->len == 0 || + !base || base->data != NULL || + (x2s != NULL && (x2s->data != NULL || + !x2 || !x2->data || x2->len == 0 || + !s || !s->data || s->len == 0))) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&X2) = 0; + MP_DIGITS(&S) = 0; + MP_DIGITS(&result) = 0; + + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&result)); + + if (x2s != NULL) { + CHECK_MPI_OK(mp_init(&X2)); + CHECK_MPI_OK(mp_init(&S)); + + SECITEM_TO_MPINT(*q, &Q); + SECITEM_TO_MPINT(*x2, &X2); + + SECITEM_TO_MPINT(*s, &S); + /* S must be in [1, Q-1] */ + if (mp_cmp_z(&S) <= 0 || mp_cmp(&S, &Q) >= 0) { + err = MP_BADARG; + goto cleanup; + } + + CHECK_MPI_OK(mp_mulmod(&X2, &S, &Q, &result)); + MPINT_TO_SECITEM(&result, x2s, arena); + } + + SECITEM_TO_MPINT(*p, &P); + CHECK_MPI_OK(jpake_Round2Base(gx1, gx3, gx4, &P, &result)); + MPINT_TO_SECITEM(&result, base, arena); + +cleanup: + mp_clear(&P); + mp_clear(&Q); + mp_clear(&X2); + mp_clear(&S); + mp_clear(&result); + + if (err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *x2, const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K) +{ + mp_err err; + mp_int P; + mp_int Q; + mp_int tmp; + mp_int exponent; + mp_int divisor; + mp_int base; + + if (!arena || + !p || !p->data || p->len == 0 || + !q || !q->data || q->len == 0 || + !x2 || !x2->data || x2->len == 0 || + !gx4 || !gx4->data || gx4->len == 0 || + !x2s || !x2s->data || x2s->len == 0 || + !B || !B->data || B->len == 0 || + !K || K->data != NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&exponent) = 0; + MP_DIGITS(&divisor) = 0; + MP_DIGITS(&base) = 0; + + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_init(&exponent)); + CHECK_MPI_OK(mp_init(&divisor)); + CHECK_MPI_OK(mp_init(&base)); + + /* exponent = -x2s (mod q) */ + SECITEM_TO_MPINT(*q, &Q); + SECITEM_TO_MPINT(*x2s, &tmp); + /* q == 0 (mod q), so q - x2s == -x2s (mod q) */ + CHECK_MPI_OK(mp_sub(&Q, &tmp, &exponent)); + + /* divisor = gx4^-x2s = 1/(gx4^x2s) (mod p) */ + SECITEM_TO_MPINT(*p, &P); + SECITEM_TO_MPINT(*gx4, &tmp); + CHECK_MPI_OK(mp_exptmod(&tmp, &exponent, &P, &divisor)); + + /* base = B*divisor = B/(gx4^x2s) (mod p) */ + SECITEM_TO_MPINT(*B, &tmp); + CHECK_MPI_OK(mp_mulmod(&divisor, &tmp, &P, &base)); + + /* tmp = base^x2 (mod p) */ + SECITEM_TO_MPINT(*x2, &exponent); + CHECK_MPI_OK(mp_exptmod(&base, &exponent, &P, &tmp)); + + MPINT_TO_SECITEM(&tmp, K, arena); + +cleanup: + mp_clear(&P); + mp_clear(&Q); + mp_clear(&tmp); + mp_clear(&exponent); + mp_clear(&divisor); + mp_clear(&base); + + if (err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return SECSuccess; +} diff --git a/security/nss/lib/freebl/ldvector.c b/security/nss/lib/freebl/ldvector.c new file mode 100644 index 0000000000..6f4bd6ad4c --- /dev/null +++ b/security/nss/lib/freebl/ldvector.c @@ -0,0 +1,433 @@ +/* + * ldvector.c - platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +extern int FREEBL_InitStubs(void); +#endif + +#include "loader.h" +#include "cmac.h" +#include "alghmac.h" +#include "hmacct.h" +#include "blapii.h" +#include "secerr.h" + +SECStatus +FREEBL_Deprecated(void) +{ + + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); + return SECFailure; +} + +static const struct FREEBLVectorStr vector = { + + sizeof vector, + FREEBL_VERSION, + + RSA_NewKey, + RSA_PublicKeyOp, + RSA_PrivateKeyOp, + DSA_NewKey, + DSA_SignDigest, + DSA_VerifyDigest, + DSA_NewKeyFromSeed, + DSA_SignDigestWithSeed, + DH_GenParam, + DH_NewKey, + DH_Derive, + KEA_Derive, + KEA_Verify, + RC4_CreateContext, + RC4_DestroyContext, + RC4_Encrypt, + RC4_Decrypt, +#ifndef NSS_DISABLE_DEPRECATED_RC2 + RC2_CreateContext, + RC2_DestroyContext, + RC2_Encrypt, + RC2_Decrypt, +#else + (F_RC2_CreateContext)FREEBL_Deprecated, + (F_RC2_DestroyContext)FREEBL_Deprecated, + (F_RC2_Encrypt)FREEBL_Deprecated, + (F_RC2_Decrypt)FREEBL_Deprecated, +#endif + RC5_CreateContext, + RC5_DestroyContext, + RC5_Encrypt, + RC5_Decrypt, + DES_CreateContext, + DES_DestroyContext, + DES_Encrypt, + DES_Decrypt, + AES_CreateContext, + AES_DestroyContext, + AES_Encrypt, + AES_Decrypt, + MD5_Hash, + MD5_HashBuf, + MD5_NewContext, + MD5_DestroyContext, + MD5_Begin, + MD5_Update, + MD5_End, + MD5_FlattenSize, + MD5_Flatten, + MD5_Resurrect, + MD5_TraceState, + MD2_Hash, + MD2_NewContext, + MD2_DestroyContext, + MD2_Begin, + MD2_Update, + MD2_End, + MD2_FlattenSize, + MD2_Flatten, + MD2_Resurrect, + SHA1_Hash, + SHA1_HashBuf, + SHA1_NewContext, + SHA1_DestroyContext, + SHA1_Begin, + SHA1_Update, + SHA1_End, + SHA1_TraceState, + SHA1_FlattenSize, + SHA1_Flatten, + SHA1_Resurrect, + RNG_RNGInit, + RNG_RandomUpdate, + RNG_GenerateGlobalRandomBytes, + RNG_RNGShutdown, + PQG_ParamGen, + PQG_ParamGenSeedLen, + PQG_VerifyParams, + + /* End of Version 3.001. */ + + RSA_PrivateKeyOpDoubleChecked, + RSA_PrivateKeyCheck, + BL_Cleanup, + + /* End of Version 3.002. */ + + SHA256_NewContext, + SHA256_DestroyContext, + SHA256_Begin, + SHA256_Update, + SHA256_End, + SHA256_HashBuf, + SHA256_Hash, + SHA256_TraceState, + SHA256_FlattenSize, + SHA256_Flatten, + SHA256_Resurrect, + + SHA512_NewContext, + SHA512_DestroyContext, + SHA512_Begin, + SHA512_Update, + SHA512_End, + SHA512_HashBuf, + SHA512_Hash, + SHA512_TraceState, + SHA512_FlattenSize, + SHA512_Flatten, + SHA512_Resurrect, + + SHA384_NewContext, + SHA384_DestroyContext, + SHA384_Begin, + SHA384_Update, + SHA384_End, + SHA384_HashBuf, + SHA384_Hash, + SHA384_TraceState, + SHA384_FlattenSize, + SHA384_Flatten, + SHA384_Resurrect, + + /* End of Version 3.003. */ + + AESKeyWrap_CreateContext, + AESKeyWrap_DestroyContext, + AESKeyWrap_Encrypt, + AESKeyWrap_Decrypt, + + /* End of Version 3.004. */ + + BLAPI_SHVerify, + BLAPI_VerifySelf, + + /* End of Version 3.005. */ + + EC_NewKey, + EC_NewKeyFromSeed, + EC_ValidatePublicKey, + ECDH_Derive, + ECDSA_SignDigest, + ECDSA_VerifyDigest, + ECDSA_SignDigestWithSeed, + + /* End of Version 3.006. */ + /* End of Version 3.007. */ + + AES_InitContext, + AESKeyWrap_InitContext, + DES_InitContext, +#ifndef NSS_DISABLE_DEPRECATED_RC2 + RC2_InitContext, +#else + (F_RC2_InitContext)FREEBL_Deprecated, +#endif + RC4_InitContext, + + AES_AllocateContext, + AESKeyWrap_AllocateContext, + DES_AllocateContext, +#ifndef NSS_DISABLE_DEPRECATED_RC2 + RC2_AllocateContext, +#else + (F_RC2_AllocateContext)FREEBL_Deprecated, +#endif + RC4_AllocateContext, + + MD2_Clone, + MD5_Clone, + SHA1_Clone, + SHA256_Clone, + SHA384_Clone, + SHA512_Clone, + + TLS_PRF, + HASH_GetRawHashObject, + + HMAC_Create, + HMAC_Init, + HMAC_Begin, + HMAC_Update, + HMAC_Clone, + HMAC_Finish, + HMAC_Destroy, + + RNG_SystemInfoForRNG, + + /* End of Version 3.008. */ + + FIPS186Change_GenerateX, + FIPS186Change_ReduceModQForDSA, + + /* End of Version 3.009. */ + Camellia_InitContext, + Camellia_AllocateContext, + Camellia_CreateContext, + Camellia_DestroyContext, + Camellia_Encrypt, + Camellia_Decrypt, + + PQG_DestroyParams, + PQG_DestroyVerify, + +/* End of Version 3.010. */ + +#ifndef NSS_DISABLE_DEPRECATED_SEED + SEED_InitContext, + SEED_AllocateContext, + SEED_CreateContext, + SEED_DestroyContext, + SEED_Encrypt, + SEED_Decrypt, +#else + (F_SEED_InitContext)FREEBL_Deprecated, + (F_SEED_AllocateContext)FREEBL_Deprecated, + (F_SEED_CreateContext)FREEBL_Deprecated, + (F_SEED_DestroyContext)FREEBL_Deprecated, + (F_SEED_Encrypt)FREEBL_Deprecated, + (F_SEED_Decrypt)FREEBL_Deprecated, +#endif /* NSS_DISABLE_DEPRECATED_SEED */ + + BL_Init, + BL_SetForkState, + + PRNGTEST_Instantiate, + PRNGTEST_Reseed, + PRNGTEST_Generate, + + PRNGTEST_Uninstantiate, + + /* End of Version 3.011. */ + + RSA_PopulatePrivateKey, + + DSA_NewRandom, + + JPAKE_Sign, + JPAKE_Verify, + JPAKE_Round2, + JPAKE_Final, + + /* End of Version 3.012 */ + + TLS_P_hash, + SHA224_NewContext, + SHA224_DestroyContext, + SHA224_Begin, + SHA224_Update, + SHA224_End, + SHA224_HashBuf, + SHA224_Hash, + SHA224_TraceState, + SHA224_FlattenSize, + SHA224_Flatten, + SHA224_Resurrect, + SHA224_Clone, + BLAPI_SHVerifyFile, + + /* End of Version 3.013 */ + + PQG_ParamGenV2, + PRNGTEST_RunHealthTests, + + /* End of Version 3.014 */ + + HMAC_ConstantTime, + SSLv3_MAC_ConstantTime, + + /* End of Version 3.015 */ + + RSA_SignRaw, + RSA_CheckSignRaw, + RSA_CheckSignRecoverRaw, + RSA_EncryptRaw, + RSA_DecryptRaw, + RSA_EncryptOAEP, + RSA_DecryptOAEP, + RSA_EncryptBlock, + RSA_DecryptBlock, + RSA_SignPSS, + RSA_CheckSignPSS, + RSA_Sign, + RSA_CheckSign, + RSA_CheckSignRecover, + + /* End of Version 3.016 */ + + EC_FillParams, + EC_DecodeParams, + EC_CopyParams, + + /* End of Version 3.017 */ + + ChaCha20Poly1305_InitContext, + ChaCha20Poly1305_CreateContext, + ChaCha20Poly1305_DestroyContext, + ChaCha20Poly1305_Seal, + ChaCha20Poly1305_Open, + + /* End of Version 3.018 */ + + EC_GetPointSize, + + /* End of Version 3.019 */ + + BLAKE2B_Hash, + BLAKE2B_HashBuf, + BLAKE2B_MAC_HashBuf, + BLAKE2B_NewContext, + BLAKE2B_DestroyContext, + BLAKE2B_Begin, + BLAKE2B_MAC_Begin, + BLAKE2B_Update, + BLAKE2B_End, + BLAKE2B_FlattenSize, + BLAKE2B_Flatten, + BLAKE2B_Resurrect, + + /* End of Version 3.020 */ + + ChaCha20_Xor, + + /* End of version 3.021 */ + + CMAC_Init, + CMAC_Create, + CMAC_Begin, + CMAC_Update, + CMAC_Finish, + CMAC_Destroy, + + /* End of version 3.022 */ + ChaCha20Poly1305_Encrypt, + ChaCha20Poly1305_Decrypt, + AES_AEAD, + AESKeyWrap_EncryptKWP, + AESKeyWrap_DecryptKWP, + + /* End of version 3.023 */ + KEA_PrimeCheck, + + /* End of version 3.024 */ + ChaCha20_InitContext, + ChaCha20_CreateContext, + ChaCha20_DestroyContext + + /* End of version 3.025 */ +}; + +const FREEBLVector* +FREEBL_GetVector(void) +{ +#ifdef FREEBL_NO_DEPEND + SECStatus rv; +#endif + +#define NSS_VERSION_VARIABLE __nss_freebl_version +#include "verref.h" + +#ifdef FREEBL_NO_DEPEND + /* this entry point is only valid if nspr and nss-util has been loaded */ + rv = FREEBL_InitStubs(); + if (rv != SECSuccess) { + return NULL; + } +#endif + +#ifndef NSS_FIPS_DISABLED + /* In FIPS mode make sure the Full self tests have been run before + * continuing. */ + BL_POSTRan(PR_FALSE); +#endif + + return &vector; +} + +#ifdef FREEBL_LOWHASH +static const struct NSSLOWVectorStr nssvector = { + sizeof nssvector, + NSSLOW_VERSION, + FREEBL_GetVector, + NSSLOW_Init, + NSSLOW_Shutdown, + NSSLOW_Reset, + NSSLOWHASH_NewContext, + NSSLOWHASH_Begin, + NSSLOWHASH_Update, + NSSLOWHASH_End, + NSSLOWHASH_Destroy, + NSSLOWHASH_Length +}; + +const NSSLOWVector* +NSSLOW_GetVector(void) +{ + /* POST check and stub init happens in FREEBL_GetVector() and + * NSSLOW_Init() respectively */ + return &nssvector; +} +#endif diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c new file mode 100644 index 0000000000..692a8831bf --- /dev/null +++ b/security/nss/lib/freebl/loader.c @@ -0,0 +1,2448 @@ +/* + * loader.c - load platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "loader.h" +#include "prmem.h" +#include "prerror.h" +#include "prinit.h" +#include "prenv.h" +#include "blname.c" + +#include "prio.h" +#include "prprf.h" +#include +#include "prsystem.h" + +static const char *NameOfThisSharedLib = + SHLIB_PREFIX "softokn" SOFTOKEN_SHLIB_VERSION "." SHLIB_SUFFIX; + +static PRLibrary *blLib = NULL; + +#define LSB(x) ((x)&0xff) +#define MSB(x) ((x) >> 8) + +static const FREEBLVector *vector; +static const char *libraryName = NULL; + +#include "genload.c" + +/* This function must be run only once. */ +/* determine if hybrid platform, then actually load the DSO. */ +static PRStatus +freebl_LoadDSO(void) +{ + PRLibrary *handle; + const char *name = getLibName(); + + if (!name) { + PR_SetError(PR_LOAD_LIBRARY_ERROR, 0); + return PR_FAILURE; + } + + handle = loader_LoadLibrary(name); + if (handle) { + PRFuncPtr address = PR_FindFunctionSymbol(handle, "FREEBL_GetVector"); + if (address) { + FREEBLGetVectorFn *getVector = (FREEBLGetVectorFn *)address; + const FREEBLVector *dsoVector = getVector(); + if (dsoVector) { + unsigned short dsoVersion = dsoVector->version; + unsigned short myVersion = FREEBL_VERSION; + if (MSB(dsoVersion) == MSB(myVersion) && + LSB(dsoVersion) >= LSB(myVersion) && + dsoVector->length >= sizeof(FREEBLVector)) { + vector = dsoVector; + libraryName = name; + blLib = handle; + return PR_SUCCESS; + } + } + } +#ifdef DEBUG + if (blLib) { + PRStatus status = PR_UnloadLibrary(blLib); + PORT_Assert(PR_SUCCESS == status); + } +#else + if (blLib) + PR_UnloadLibrary(blLib); +#endif + } + return PR_FAILURE; +} + +static const PRCallOnceType pristineCallOnce; +static PRCallOnceType loadFreeBLOnce; + +static PRStatus +freebl_RunLoaderOnce(void) +{ + PRStatus status; + + status = PR_CallOnce(&loadFreeBLOnce, &freebl_LoadDSO); + return status; +} + +SECStatus +BL_Init(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_BL_Init)(); +} + +RSAPrivateKey * +RSA_NewKey(int keySizeInBits, SECItem *publicExponent) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RSA_NewKey)(keySizeInBits, publicExponent); +} + +SECStatus +RSA_PublicKeyOp(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PublicKeyOp)(key, output, input); +} + +SECStatus +RSA_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PrivateKeyOp)(key, output, input); +} + +SECStatus +RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PrivateKeyOpDoubleChecked)(key, output, input); +} + +SECStatus +RSA_PrivateKeyCheck(const RSAPrivateKey *key) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PrivateKeyCheck)(key); +} + +SECStatus +DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_NewKey)(params, privKey); +} + +SECStatus +DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_SignDigest)(key, signature, digest); +} + +SECStatus +DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_VerifyDigest)(key, signature, digest); +} + +SECStatus +DSA_NewKeyFromSeed(const PQGParams *params, const unsigned char *seed, + DSAPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_NewKeyFromSeed)(params, seed, privKey); +} + +SECStatus +DSA_SignDigestWithSeed(DSAPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *seed) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_SignDigestWithSeed)(key, signature, digest, seed); +} + +SECStatus +DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_NewRandom)(arena, q, seed); +} + +SECStatus +DH_GenParam(int primeLen, DHParams **params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DH_GenParam)(primeLen, params); +} + +SECStatus +DH_NewKey(DHParams *params, DHPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DH_NewKey)(params, privKey); +} + +SECStatus +DH_Derive(SECItem *publicValue, SECItem *prime, SECItem *privateValue, + SECItem *derivedSecret, unsigned int maxOutBytes) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DH_Derive)(publicValue, prime, privateValue, + derivedSecret, maxOutBytes); +} + +SECStatus +KEA_Derive(SECItem *prime, SECItem *public1, SECItem *public2, + SECItem *private1, SECItem *private2, SECItem *derivedSecret) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_KEA_Derive)(prime, public1, public2, + private1, private2, derivedSecret); +} + +PRBool +KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return (vector->p_KEA_Verify)(Y, prime, subPrime); +} + +PRBool +KEA_PrimeCheck(SECItem *prime) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return (vector->p_KEA_PrimeCheck)(prime); +} + +RC4Context * +RC4_CreateContext(const unsigned char *key, int len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC4_CreateContext)(key, len); +} + +void +RC4_DestroyContext(RC4Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RC4_DestroyContext)(cx, freeit); +} + +SECStatus +RC4_Encrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC4_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RC4_Decrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC4_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +RC2Context * +RC2_CreateContext(const unsigned char *key, unsigned int len, + const unsigned char *iv, int mode, unsigned effectiveKeyLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; +#ifndef NSS_DISABLE_DEPRECATED_RC2 + return (vector->p_RC2_CreateContext)(key, len, iv, mode, effectiveKeyLen); +#else + return NULL; +#endif +} + +void +RC2_DestroyContext(RC2Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; +#ifndef NSS_DISABLE_DEPRECATED_RC2 + (vector->p_RC2_DestroyContext)(cx, freeit); +#else + return; +#endif +} + +SECStatus +RC2_Encrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; +#ifndef NSS_DISABLE_DEPRECATED_RC2 + return (vector->p_RC2_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +#else + return SECFailure; +#endif +} + +SECStatus +RC2_Decrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; +#ifndef NSS_DISABLE_DEPRECATED_RC2 + return (vector->p_RC2_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +#else + return SECFailure; +#endif +} + +RC5Context * +RC5_CreateContext(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC5_CreateContext)(key, rounds, wordSize, iv, mode); +} + +void +RC5_DestroyContext(RC5Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RC5_DestroyContext)(cx, freeit); +} + +SECStatus +RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC5_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC5_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +DESContext * +DES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_DES_CreateContext)(key, iv, mode, encrypt); +} + +void +DES_DestroyContext(DESContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_DES_DestroyContext)(cx, freeit); +} + +SECStatus +DES_Encrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DES_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +DES_Decrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DES_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} +SEEDContext * +SEED_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; +#ifndef NSS_DISABLE_DEPRECATED_SEED + return (vector->p_SEED_CreateContext)(key, iv, mode, encrypt); +#else + return NULL; +#endif +} + +void +SEED_DestroyContext(SEEDContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; +#ifndef NSS_DISABLE_DEPRECATED_SEED + (vector->p_SEED_DestroyContext)(cx, freeit); +#else + return; +#endif +} + +SECStatus +SEED_Encrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; +#ifndef NSS_DISABLE_DEPRECATED_SEED + return (vector->p_SEED_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +#else + return SECFailure; +#endif +} + +SECStatus +SEED_Decrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; +#ifndef NSS_DISABLE_DEPRECATED_SEED + return (vector->p_SEED_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +#else + return SECFailure; +#endif +} + +AESContext * +AES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen, unsigned int blocklen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_AES_CreateContext)(key, iv, mode, encrypt, keylen, + blocklen); +} + +void +AES_DestroyContext(AESContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_AES_DestroyContext)(cx, freeit); +} + +SECStatus +AES_Encrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_Encrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +AES_Decrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_Decrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +AES_AEAD(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + void *params, unsigned int paramsLen, + const unsigned char *aad, unsigned int aadLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_AEAD)(cx, output, outputLen, maxOutputLen, input, + inputLen, params, paramsLen, aad, aadLen); +} + +SECStatus +MD5_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD5_Hash)(dest, src); +} + +SECStatus +MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD5_HashBuf)(dest, src, src_length); +} + +MD5Context * +MD5_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD5_NewContext)(); +} + +void +MD5_DestroyContext(MD5Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_DestroyContext)(cx, freeit); +} + +void +MD5_Begin(MD5Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_Begin)(cx); +} + +void +MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_Update)(cx, input, inputLen); +} + +void +MD5_End(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_End)(cx, digest, digestLen, maxDigestLen); +} + +unsigned int +MD5_FlattenSize(MD5Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_MD5_FlattenSize)(cx); +} + +SECStatus +MD5_Flatten(MD5Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD5_Flatten)(cx, space); +} + +MD5Context * +MD5_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD5_Resurrect)(space, arg); +} + +void +MD5_TraceState(MD5Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_TraceState)(cx); +} + +SECStatus +MD2_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD2_Hash)(dest, src); +} + +MD2Context * +MD2_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD2_NewContext)(); +} + +void +MD2_DestroyContext(MD2Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_DestroyContext)(cx, freeit); +} + +void +MD2_Begin(MD2Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_Begin)(cx); +} + +void +MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_Update)(cx, input, inputLen); +} + +void +MD2_End(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_End)(cx, digest, digestLen, maxDigestLen); +} + +unsigned int +MD2_FlattenSize(MD2Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_MD2_FlattenSize)(cx); +} + +SECStatus +MD2_Flatten(MD2Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD2_Flatten)(cx, space); +} + +MD2Context * +MD2_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD2_Resurrect)(space, arg); +} + +SECStatus +SHA1_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA1_Hash)(dest, src); +} + +SECStatus +SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA1_HashBuf)(dest, src, src_length); +} + +SHA1Context * +SHA1_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA1_NewContext)(); +} + +void +SHA1_DestroyContext(SHA1Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_DestroyContext)(cx, freeit); +} + +void +SHA1_Begin(SHA1Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_Begin)(cx); +} + +void +SHA1_Update(SHA1Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_Update)(cx, input, inputLen); +} + +void +SHA1_End(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA1_TraceState(SHA1Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_TraceState)(cx); +} + +unsigned int +SHA1_FlattenSize(SHA1Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA1_FlattenSize)(cx); +} + +SECStatus +SHA1_Flatten(SHA1Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA1_Flatten)(cx, space); +} + +SHA1Context * +SHA1_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA1_Resurrect)(space, arg); +} + +SECStatus +RNG_RNGInit(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RNG_RNGInit)(); +} + +SECStatus +RNG_RandomUpdate(const void *data, size_t bytes) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RNG_RandomUpdate)(data, bytes); +} + +SECStatus +RNG_GenerateGlobalRandomBytes(void *dest, size_t len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RNG_GenerateGlobalRandomBytes)(dest, len); +} + +void +RNG_RNGShutdown(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RNG_RNGShutdown)(); +} + +SECStatus +PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_ParamGen)(j, pParams, pVfy); +} + +SECStatus +PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_ParamGenSeedLen)(j, seedBytes, pParams, pVfy); +} + +SECStatus +PQG_VerifyParams(const PQGParams *params, const PQGVerify *vfy, + SECStatus *result) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_VerifyParams)(params, vfy, result); +} + +void +PQG_DestroyParams(PQGParams *params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_PQG_DestroyParams)(params); +} + +void +PQG_DestroyVerify(PQGVerify *vfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_PQG_DestroyVerify)(vfy); +} + +void +BL_Cleanup(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_BL_Cleanup)(); +} + +void +BL_Unload(void) +{ + /* This function is not thread-safe, but doesn't need to be, because it is + * only called from functions that are also defined as not thread-safe, + * namely C_Finalize in softoken, and the SSL bypass shutdown callback called + * from NSS_Shutdown. */ + char *disableUnload = NULL; + vector = NULL; + disableUnload = PR_GetEnvSecure("NSS_DISABLE_UNLOAD"); + if (blLib && !disableUnload) { +#ifdef DEBUG + PRStatus status = PR_UnloadLibrary(blLib); + PORT_Assert(PR_SUCCESS == status); +#else + PR_UnloadLibrary(blLib); +#endif + } + blLib = NULL; + loadFreeBLOnce = pristineCallOnce; +} + +/* ============== New for 3.003 =============================== */ + +SECStatus +SHA256_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA256_Hash)(dest, src); +} + +SECStatus +SHA256_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA256_HashBuf)(dest, src, src_length); +} + +SHA256Context * +SHA256_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA256_NewContext)(); +} + +void +SHA256_DestroyContext(SHA256Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_DestroyContext)(cx, freeit); +} + +void +SHA256_Begin(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_Begin)(cx); +} + +void +SHA256_Update(SHA256Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_Update)(cx, input, inputLen); +} + +void +SHA256_End(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA256_TraceState(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_TraceState)(cx); +} + +unsigned int +SHA256_FlattenSize(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA256_FlattenSize)(cx); +} + +SECStatus +SHA256_Flatten(SHA256Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA256_Flatten)(cx, space); +} + +SHA256Context * +SHA256_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA256_Resurrect)(space, arg); +} + +SECStatus +SHA512_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA512_Hash)(dest, src); +} + +SECStatus +SHA512_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA512_HashBuf)(dest, src, src_length); +} + +SHA512Context * +SHA512_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA512_NewContext)(); +} + +void +SHA512_DestroyContext(SHA512Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_DestroyContext)(cx, freeit); +} + +void +SHA512_Begin(SHA512Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_Begin)(cx); +} + +void +SHA512_Update(SHA512Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_Update)(cx, input, inputLen); +} + +void +SHA512_End(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA512_TraceState(SHA512Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_TraceState)(cx); +} + +unsigned int +SHA512_FlattenSize(SHA512Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA512_FlattenSize)(cx); +} + +SECStatus +SHA512_Flatten(SHA512Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA512_Flatten)(cx, space); +} + +SHA512Context * +SHA512_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA512_Resurrect)(space, arg); +} + +SECStatus +SHA384_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA384_Hash)(dest, src); +} + +SECStatus +SHA384_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA384_HashBuf)(dest, src, src_length); +} + +SHA384Context * +SHA384_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA384_NewContext)(); +} + +void +SHA384_DestroyContext(SHA384Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_DestroyContext)(cx, freeit); +} + +void +SHA384_Begin(SHA384Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_Begin)(cx); +} + +void +SHA384_Update(SHA384Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_Update)(cx, input, inputLen); +} + +void +SHA384_End(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA384_TraceState(SHA384Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_TraceState)(cx); +} + +unsigned int +SHA384_FlattenSize(SHA384Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA384_FlattenSize)(cx); +} + +SECStatus +SHA384_Flatten(SHA384Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA384_Flatten)(cx, space); +} + +SHA384Context * +SHA384_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA384_Resurrect)(space, arg); +} + +AESKeyWrapContext * +AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, + int encrypt, unsigned int keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return vector->p_AESKeyWrap_CreateContext(key, iv, encrypt, keylen); +} + +void +AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + vector->p_AESKeyWrap_DestroyContext(cx, freeit); +} + +SECStatus +AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_AESKeyWrap_Encrypt(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_AESKeyWrap_Decrypt(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +AESKeyWrap_EncryptKWP(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_AESKeyWrap_EncryptKWP(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +AESKeyWrap_DecryptKWP(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_AESKeyWrap_DecryptKWP(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +PRBool +BLAPI_SHVerify(const char *name, PRFuncPtr addr) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return vector->p_BLAPI_SHVerify(name, addr); +} + +/* + * The Caller is expected to pass NULL as the name, which will + * trigger the p_BLAPI_VerifySelf() to return 'TRUE'. Pass the real + * name of the shared library we loaded (the static libraryName set + * in freebl_LoadDSO) to p_BLAPI_VerifySelf. + */ +PRBool +BLAPI_VerifySelf(const char *name) +{ + PORT_Assert(!name); + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return vector->p_BLAPI_VerifySelf(libraryName); +} + +/* ============== New for 3.006 =============================== */ + +SECStatus +EC_NewKey(ECParams *params, ECPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_NewKey)(params, privKey); +} + +SECStatus +EC_NewKeyFromSeed(ECParams *params, ECPrivateKey **privKey, + const unsigned char *seed, int seedlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_NewKeyFromSeed)(params, privKey, seed, seedlen); +} + +SECStatus +EC_ValidatePublicKey(ECParams *params, SECItem *publicValue) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_ValidatePublicKey)(params, publicValue); +} + +SECStatus +ECDH_Derive(SECItem *publicValue, ECParams *params, SECItem *privateValue, + PRBool withCofactor, SECItem *derivedSecret) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDH_Derive)(publicValue, params, privateValue, + withCofactor, derivedSecret); +} + +SECStatus +ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, + const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDSA_SignDigest)(key, signature, digest); +} + +SECStatus +ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDSA_VerifyDigest)(key, signature, digest); +} + +SECStatus +ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *seed, const int seedlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDSA_SignDigestWithSeed)(key, signature, digest, + seed, seedlen); +} + +/* ============== New for 3.008 =============================== */ + +AESContext * +AES_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_AES_AllocateContext)(); +} + +AESKeyWrapContext * +AESKeyWrap_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_AESKeyWrap_AllocateContext)(); +} + +DESContext * +DES_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_DES_AllocateContext)(); +} + +RC2Context * +RC2_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; +#ifndef NSS_DISABLE_DEPRECATED_RC2 + return (vector->p_RC2_AllocateContext)(); +#else + return NULL; +#endif +} + +RC4Context * +RC4_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC4_AllocateContext)(); +} + +SECStatus +AES_InitContext(AESContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int blocklen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_InitContext)(cx, key, keylen, iv, mode, encrypt, + blocklen); +} + +SECStatus +AESKeyWrap_InitContext(AESKeyWrapContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int blocklen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AESKeyWrap_InitContext)(cx, key, keylen, iv, mode, + encrypt, blocklen); +} + +SECStatus +DES_InitContext(DESContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int xtra) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DES_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra); +} + +SECStatus +SEED_InitContext(SEEDContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int xtra) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; +#ifndef NSS_DISABLE_DEPRECATED_SEED + return (vector->p_SEED_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra); +#else + return SECFailure; +#endif +} + +SECStatus +RC2_InitContext(RC2Context *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int effectiveKeyLen, unsigned int xtra) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; +#ifndef NSS_DISABLE_DEPRECATED_RC2 + return (vector->p_RC2_InitContext)(cx, key, keylen, iv, mode, + effectiveKeyLen, xtra); +#else + return SECFailure; +#endif +} + +SECStatus +RC4_InitContext(RC4Context *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *x1, int x2, + unsigned int x3, unsigned int x4) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC4_InitContext)(cx, key, keylen, x1, x2, x3, x4); +} + +void +MD2_Clone(MD2Context *dest, MD2Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_Clone)(dest, src); +} + +void +MD5_Clone(MD5Context *dest, MD5Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_Clone)(dest, src); +} + +void +SHA1_Clone(SHA1Context *dest, SHA1Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_Clone)(dest, src); +} + +void +SHA256_Clone(SHA256Context *dest, SHA256Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_Clone)(dest, src); +} + +void +SHA384_Clone(SHA384Context *dest, SHA384Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_Clone)(dest, src); +} + +void +SHA512_Clone(SHA512Context *dest, SHA512Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_Clone)(dest, src); +} + +SECStatus +TLS_PRF(const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_TLS_PRF)(secret, label, seed, result, isFIPS); +} + +const SECHashObject * +HASH_GetRawHashObject(HASH_HashType hashType) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_HASH_GetRawHashObject)(hashType); +} + +void +HMAC_Destroy(HMACContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_HMAC_Destroy)(cx, freeit); +} + +HMACContext * +HMAC_Create(const SECHashObject *hashObj, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_HMAC_Create)(hashObj, secret, secret_len, isFIPS); +} + +SECStatus +HMAC_Init(HMACContext *cx, const SECHashObject *hashObj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_HMAC_Init)(cx, hashObj, secret, secret_len, isFIPS); +} + +void +HMAC_Begin(HMACContext *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_HMAC_Begin)(cx); +} + +void +HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_HMAC_Update)(cx, data, data_len); +} + +SECStatus +HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_HMAC_Finish)(cx, result, result_len, max_result_len); +} + +HMACContext * +HMAC_Clone(HMACContext *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_HMAC_Clone)(cx); +} + +void +RNG_SystemInfoForRNG(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RNG_SystemInfoForRNG)(); +} + +SECStatus +FIPS186Change_GenerateX(unsigned char *XKEY, const unsigned char *XSEEDj, + unsigned char *x_j) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_FIPS186Change_GenerateX)(XKEY, XSEEDj, x_j); +} + +SECStatus +FIPS186Change_ReduceModQForDSA(const unsigned char *w, + const unsigned char *q, + unsigned char *xj) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_FIPS186Change_ReduceModQForDSA)(w, q, xj); +} + +/* === new for Camellia === */ +SECStatus +Camellia_InitContext(CamelliaContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int unused) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_Camellia_InitContext)(cx, key, keylen, iv, mode, encrypt, + unused); +} + +CamelliaContext * +Camellia_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_Camellia_AllocateContext)(); +} + +CamelliaContext * +Camellia_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_Camellia_CreateContext)(key, iv, mode, encrypt, keylen); +} + +void +Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_Camellia_DestroyContext)(cx, freeit); +} + +SECStatus +Camellia_Encrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_Camellia_Encrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +Camellia_Decrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_Camellia_Decrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +void +BL_SetForkState(PRBool forked) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_BL_SetForkState)(forked); +} + +SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Instantiate)(entropy, entropy_len, + nonce, nonce_len, + personal_string, ps_len); +} + +SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Reseed)(entropy, entropy_len, + additional, additional_len); +} + +SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Generate)(bytes, bytes_len, + additional, additional_len); +} + +SECStatus +PRNGTEST_Uninstantiate() +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Uninstantiate)(); +} + +SECStatus +RSA_PopulatePrivateKey(RSAPrivateKey *key) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PopulatePrivateKey)(key); +} + +SECStatus +JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *x, + const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Sign)(arena, pqg, hashType, signerID, x, + testRandom, gxIn, gxOut, gv, r); +} + +SECStatus +JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *peerID, const SECItem *gx, + const SECItem *gv, const SECItem *r) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Verify)(arena, pqg, hashType, signerID, peerID, + gx, gv, r); +} + +SECStatus +JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *gx1, const SECItem *gx3, const SECItem *gx4, + SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Round2)(arena, p, q, gx1, gx3, gx4, base, x2, s, x2s); +} + +SECStatus +JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *x2, const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Final)(arena, p, q, x2, gx4, x2s, B, K); +} + +SECStatus +TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_TLS_P_hash)(hashAlg, secret, label, seed, result, isFIPS); +} + +SECStatus +SHA224_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA224_Hash)(dest, src); +} + +SECStatus +SHA224_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA224_HashBuf)(dest, src, src_length); +} + +SHA224Context * +SHA224_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA224_NewContext)(); +} + +void +SHA224_DestroyContext(SHA224Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_DestroyContext)(cx, freeit); +} + +void +SHA224_Begin(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_Begin)(cx); +} + +void +SHA224_Update(SHA224Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_Update)(cx, input, inputLen); +} + +void +SHA224_End(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA224_TraceState(SHA224Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_TraceState)(cx); +} + +unsigned int +SHA224_FlattenSize(SHA224Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA224_FlattenSize)(cx); +} + +SECStatus +SHA224_Flatten(SHA224Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA224_Flatten)(cx, space); +} + +SHA224Context * +SHA224_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA224_Resurrect)(space, arg); +} + +void +SHA224_Clone(SHA224Context *dest, SHA224Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_Clone)(dest, src); +} + +PRBool +BLAPI_SHVerifyFile(const char *name) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return vector->p_BLAPI_SHVerifyFile(name); +} + +/* === new for DSA-2 === */ +SECStatus +PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_ParamGenV2)(L, N, seedBytes, pParams, pVfy); +} + +SECStatus +PRNGTEST_RunHealthTests(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_PRNGTEST_RunHealthTests(); +} + +SECStatus +SSLv3_MAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SSLv3_MAC_ConstantTime)( + result, resultLen, maxResultLen, + hashObj, + secret, secretLen, + header, headerLen, + body, bodyLen, bodyTotalLen); +} + +SECStatus +HMAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_HMAC_ConstantTime)( + result, resultLen, maxResultLen, + hashObj, + secret, secretLen, + header, headerLen, + body, bodyLen, bodyTotalLen); +} + +SECStatus +RSA_SignRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_SignRaw)(key, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RSA_CheckSignRaw(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignRaw)(key, sig, sigLen, hash, hashLen); +} + +SECStatus +RSA_CheckSignRecoverRaw(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignRecoverRaw)(key, data, dataLen, maxDataLen, + sig, sigLen); +} + +SECStatus +RSA_EncryptRaw(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_EncryptRaw)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_DecryptRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_DecryptRaw)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_EncryptOAEP(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_EncryptOAEP)(key, hashAlg, maskHashAlg, label, + labelLen, seed, seedLen, output, + outputLen, maxOutputLen, input, inputLen); +} + +SECStatus +RSA_DecryptOAEP(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_DecryptOAEP)(key, hashAlg, maskHashAlg, label, + labelLen, output, outputLen, + maxOutputLen, input, inputLen); +} + +SECStatus +RSA_EncryptBlock(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_EncryptBlock)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_DecryptBlock(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_DecryptBlock)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_SignPSS(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_SignPSS)(key, hashAlg, maskHashAlg, salt, saltLen, + output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RSA_CheckSignPSS(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignPSS)(key, hashAlg, maskHashAlg, saltLen, + sig, sigLen, hash, hashLen); +} + +SECStatus +RSA_Sign(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_Sign)(key, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RSA_CheckSign(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSign)(key, sig, sigLen, data, dataLen); +} + +SECStatus +RSA_CheckSignRecover(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignRecover)(key, output, outputLen, maxOutputLen, + sig, sigLen); +} + +SECStatus +EC_FillParams(PLArenaPool *arena, + const SECItem *encodedParams, + ECParams *params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_FillParams)(arena, encodedParams, params); +} + +SECStatus +EC_DecodeParams(const SECItem *encodedParams, + ECParams **ecparams) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_DecodeParams)(encodedParams, ecparams); +} + +SECStatus +EC_CopyParams(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_CopyParams)(arena, dstParams, srcParams); +} + +SECStatus +ChaCha20_Xor(unsigned char *output, const unsigned char *block, unsigned int len, + const unsigned char *k, const unsigned char *nonce, PRUint32 ctr) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_ChaCha20_Xor)(output, block, len, k, nonce, ctr); +} + +SECStatus +ChaCha20_InitContext(ChaCha20Context *ctx, const unsigned char *key, + unsigned int keyLen, + const unsigned char *nonce, + unsigned int nonceLen, + PRUint32 ctr) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20_InitContext)(ctx, key, keyLen, nonce, nonceLen, ctr); +} + +ChaCha20Context * +ChaCha20_CreateContext(const unsigned char *key, unsigned int keyLen, + const unsigned char *nonce, unsigned int nonceLen, + PRUint32 ctr) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_ChaCha20_CreateContext)(key, keyLen, nonce, nonceLen, ctr); +} + +void +ChaCha20_DestroyContext(ChaCha20Context *ctx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_ChaCha20_DestroyContext)(ctx, freeit); +} + +SECStatus +ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, + const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_InitContext)(ctx, key, keyLen, tagLen); +} + +ChaCha20Poly1305Context * +ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_ChaCha20Poly1305_CreateContext)(key, keyLen, tagLen); +} + +void +ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_ChaCha20Poly1305_DestroyContext)(ctx, freeit); +} + +SECStatus +ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_Seal)( + ctx, output, outputLen, maxOutputLen, input, inputLen, + nonce, nonceLen, ad, adLen); +} + +SECStatus +ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_Open)( + ctx, output, outputLen, maxOutputLen, input, inputLen, + nonce, nonceLen, ad, adLen); +} + +SECStatus +ChaCha20Poly1305_Encrypt(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen, + unsigned char *tagOut) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_Encrypt)( + ctx, output, outputLen, maxOutputLen, input, inputLen, + nonce, nonceLen, ad, adLen, tagOut); +} + +SECStatus +ChaCha20Poly1305_Decrypt(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen, + unsigned char *tagIn) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_Decrypt)( + ctx, output, outputLen, maxOutputLen, input, inputLen, + nonce, nonceLen, ad, adLen, tagIn); +} + +int +EC_GetPointSize(const ECParams *params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_GetPointSize)(params); +} + +SECStatus +BLAKE2B_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Hash)(dest, src); +} + +SECStatus +BLAKE2B_HashBuf(unsigned char *output, const unsigned char *input, PRUint32 inlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_HashBuf)(output, input, inlen); +} + +SECStatus +BLAKE2B_MAC_HashBuf(unsigned char *output, const unsigned char *input, + unsigned int inlen, const unsigned char *key, + unsigned int keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_MAC_HashBuf)(output, input, inlen, key, keylen); +} + +BLAKE2BContext * +BLAKE2B_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return NULL; + } + return (vector->p_BLAKE2B_NewContext)(); +} + +void +BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return; + } + (vector->p_BLAKE2B_DestroyContext)(ctx, freeit); +} + +SECStatus +BLAKE2B_Begin(BLAKE2BContext *ctx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Begin)(ctx); +} + +SECStatus +BLAKE2B_MAC_Begin(BLAKE2BContext *ctx, const PRUint8 *key, const size_t keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_MAC_Begin)(ctx, key, keylen); +} + +SECStatus +BLAKE2B_Update(BLAKE2BContext *ctx, const unsigned char *in, unsigned int inlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Update)(ctx, in, inlen); +} + +SECStatus +BLAKE2B_End(BLAKE2BContext *ctx, unsigned char *out, + unsigned int *digestLen, size_t maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_End)(ctx, out, digestLen, maxDigestLen); +} + +unsigned int +BLAKE2B_FlattenSize(BLAKE2BContext *ctx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return 0; + } + return (vector->p_BLAKE2B_FlattenSize)(ctx); +} + +SECStatus +BLAKE2B_Flatten(BLAKE2BContext *ctx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Flatten)(ctx, space); +} + +BLAKE2BContext * +BLAKE2B_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return NULL; + } + return (vector->p_BLAKE2B_Resurrect)(space, arg); +} + +/* == New for CMAC == */ +SECStatus +CMAC_Init(CMACContext *ctx, CMACCipher type, const unsigned char *key, + unsigned int key_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_CMAC_Init)(ctx, type, key, key_len); +} + +CMACContext * +CMAC_Create(CMACCipher type, const unsigned char *key, unsigned int key_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_CMAC_Create)(type, key, key_len); +} + +SECStatus +CMAC_Begin(CMACContext *ctx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_CMAC_Begin)(ctx); +} + +SECStatus +CMAC_Update(CMACContext *ctx, const unsigned char *data, unsigned int data_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_CMAC_Update)(ctx, data, data_len); +} + +SECStatus +CMAC_Finish(CMACContext *ctx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_CMAC_Finish)(ctx, result, result_len, max_result_len); +} + +void +CMAC_Destroy(CMACContext *ctx, PRBool free_it) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_CMAC_Destroy)(ctx, free_it); +} diff --git a/security/nss/lib/freebl/loader.h b/security/nss/lib/freebl/loader.h new file mode 100644 index 0000000000..eb3046d272 --- /dev/null +++ b/security/nss/lib/freebl/loader.h @@ -0,0 +1,936 @@ +/* + * loader.h - load platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _LOADER_H_ +#define _LOADER_H_ 1 + +#include "blapi.h" + +#define FREEBL_VERSION 0x0325 + +struct FREEBLVectorStr { + + unsigned short length; /* of this struct in bytes */ + unsigned short version; /* of this struct. */ + + RSAPrivateKey *(*p_RSA_NewKey)(int keySizeInBits, + SECItem *publicExponent); + + SECStatus (*p_RSA_PublicKeyOp)(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input); + + SECStatus (*p_RSA_PrivateKeyOp)(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + + SECStatus (*p_DSA_NewKey)(const PQGParams *params, + DSAPrivateKey **privKey); + + SECStatus (*p_DSA_SignDigest)(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest); + + SECStatus (*p_DSA_VerifyDigest)(DSAPublicKey *key, + const SECItem *signature, + const SECItem *digest); + + SECStatus (*p_DSA_NewKeyFromSeed)(const PQGParams *params, + const unsigned char *seed, + DSAPrivateKey **privKey); + + SECStatus (*p_DSA_SignDigestWithSeed)(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed); + + SECStatus (*p_DH_GenParam)(int primeLen, DHParams **params); + + SECStatus (*p_DH_NewKey)(DHParams *params, + DHPrivateKey **privKey); + + SECStatus (*p_DH_Derive)(SECItem *publicValue, + SECItem *prime, + SECItem *privateValue, + SECItem *derivedSecret, + unsigned int maxOutBytes); + + SECStatus (*p_KEA_Derive)(SECItem *prime, + SECItem *public1, + SECItem *public2, + SECItem *private1, + SECItem *private2, + SECItem *derivedSecret); + + PRBool (*p_KEA_Verify)(SECItem *Y, SECItem *prime, SECItem *subPrime); + + RC4Context *(*p_RC4_CreateContext)(const unsigned char *key, int len); + + void (*p_RC4_DestroyContext)(RC4Context *cx, PRBool freeit); + + SECStatus (*p_RC4_Encrypt)(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_RC4_Decrypt)(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + RC2Context *(*p_RC2_CreateContext)(const unsigned char *key, + unsigned int len, const unsigned char *iv, + int mode, unsigned effectiveKeyLen); + + void (*p_RC2_DestroyContext)(RC2Context *cx, PRBool freeit); + + SECStatus (*p_RC2_Encrypt)(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_RC2_Decrypt)(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + RC5Context *(*p_RC5_CreateContext)(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode); + + void (*p_RC5_DestroyContext)(RC5Context *cx, PRBool freeit); + + SECStatus (*p_RC5_Encrypt)(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_RC5_Decrypt)(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + DESContext *(*p_DES_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); + + void (*p_DES_DestroyContext)(DESContext *cx, PRBool freeit); + + SECStatus (*p_DES_Encrypt)(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_DES_Decrypt)(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + AESContext *(*p_AES_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, int encrypt, unsigned int keylen, + unsigned int blocklen); + + void (*p_AES_DestroyContext)(AESContext *cx, PRBool freeit); + + SECStatus (*p_AES_Encrypt)(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_AES_Decrypt)(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_MD5_Hash)(unsigned char *dest, const char *src); + + SECStatus (*p_MD5_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + + MD5Context *(*p_MD5_NewContext)(void); + + void (*p_MD5_DestroyContext)(MD5Context *cx, PRBool freeit); + + void (*p_MD5_Begin)(MD5Context *cx); + + void (*p_MD5_Update)(MD5Context *cx, + const unsigned char *input, unsigned int inputLen); + + void (*p_MD5_End)(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + + unsigned int (*p_MD5_FlattenSize)(MD5Context *cx); + + SECStatus (*p_MD5_Flatten)(MD5Context *cx, unsigned char *space); + + MD5Context *(*p_MD5_Resurrect)(unsigned char *space, void *arg); + + void (*p_MD5_TraceState)(MD5Context *cx); + + SECStatus (*p_MD2_Hash)(unsigned char *dest, const char *src); + + MD2Context *(*p_MD2_NewContext)(void); + + void (*p_MD2_DestroyContext)(MD2Context *cx, PRBool freeit); + + void (*p_MD2_Begin)(MD2Context *cx); + + void (*p_MD2_Update)(MD2Context *cx, + const unsigned char *input, unsigned int inputLen); + + void (*p_MD2_End)(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + + unsigned int (*p_MD2_FlattenSize)(MD2Context *cx); + + SECStatus (*p_MD2_Flatten)(MD2Context *cx, unsigned char *space); + + MD2Context *(*p_MD2_Resurrect)(unsigned char *space, void *arg); + + SECStatus (*p_SHA1_Hash)(unsigned char *dest, const char *src); + + SECStatus (*p_SHA1_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + + SHA1Context *(*p_SHA1_NewContext)(void); + + void (*p_SHA1_DestroyContext)(SHA1Context *cx, PRBool freeit); + + void (*p_SHA1_Begin)(SHA1Context *cx); + + void (*p_SHA1_Update)(SHA1Context *cx, const unsigned char *input, + unsigned int inputLen); + + void (*p_SHA1_End)(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + + void (*p_SHA1_TraceState)(SHA1Context *cx); + + unsigned int (*p_SHA1_FlattenSize)(SHA1Context *cx); + + SECStatus (*p_SHA1_Flatten)(SHA1Context *cx, unsigned char *space); + + SHA1Context *(*p_SHA1_Resurrect)(unsigned char *space, void *arg); + + SECStatus (*p_RNG_RNGInit)(void); + + SECStatus (*p_RNG_RandomUpdate)(const void *data, size_t bytes); + + SECStatus (*p_RNG_GenerateGlobalRandomBytes)(void *dest, size_t len); + + void (*p_RNG_RNGShutdown)(void); + + SECStatus (*p_PQG_ParamGen)(unsigned int j, PQGParams **pParams, + PQGVerify **pVfy); + + SECStatus (*p_PQG_ParamGenSeedLen)(unsigned int j, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy); + + SECStatus (*p_PQG_VerifyParams)(const PQGParams *params, + const PQGVerify *vfy, SECStatus *result); + + /* Version 3.001 came to here */ + + SECStatus (*p_RSA_PrivateKeyOpDoubleChecked)(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + + SECStatus (*p_RSA_PrivateKeyCheck)(const RSAPrivateKey *key); + + void (*p_BL_Cleanup)(void); + + /* Version 3.002 came to here */ + + SHA256Context *(*p_SHA256_NewContext)(void); + void (*p_SHA256_DestroyContext)(SHA256Context *cx, PRBool freeit); + void (*p_SHA256_Begin)(SHA256Context *cx); + void (*p_SHA256_Update)(SHA256Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA256_End)(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA256_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA256_Hash)(unsigned char *dest, const char *src); + void (*p_SHA256_TraceState)(SHA256Context *cx); + unsigned int (*p_SHA256_FlattenSize)(SHA256Context *cx); + SECStatus (*p_SHA256_Flatten)(SHA256Context *cx, unsigned char *space); + SHA256Context *(*p_SHA256_Resurrect)(unsigned char *space, void *arg); + + SHA512Context *(*p_SHA512_NewContext)(void); + void (*p_SHA512_DestroyContext)(SHA512Context *cx, PRBool freeit); + void (*p_SHA512_Begin)(SHA512Context *cx); + void (*p_SHA512_Update)(SHA512Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA512_End)(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA512_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA512_Hash)(unsigned char *dest, const char *src); + void (*p_SHA512_TraceState)(SHA512Context *cx); + unsigned int (*p_SHA512_FlattenSize)(SHA512Context *cx); + SECStatus (*p_SHA512_Flatten)(SHA512Context *cx, unsigned char *space); + SHA512Context *(*p_SHA512_Resurrect)(unsigned char *space, void *arg); + + SHA384Context *(*p_SHA384_NewContext)(void); + void (*p_SHA384_DestroyContext)(SHA384Context *cx, PRBool freeit); + void (*p_SHA384_Begin)(SHA384Context *cx); + void (*p_SHA384_Update)(SHA384Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA384_End)(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA384_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA384_Hash)(unsigned char *dest, const char *src); + void (*p_SHA384_TraceState)(SHA384Context *cx); + unsigned int (*p_SHA384_FlattenSize)(SHA384Context *cx); + SECStatus (*p_SHA384_Flatten)(SHA384Context *cx, unsigned char *space); + SHA384Context *(*p_SHA384_Resurrect)(unsigned char *space, void *arg); + + /* Version 3.003 came to here */ + + AESKeyWrapContext *(*p_AESKeyWrap_CreateContext)(const unsigned char *key, + const unsigned char *iv, int encrypt, unsigned int keylen); + + void (*p_AESKeyWrap_DestroyContext)(AESKeyWrapContext *cx, PRBool freeit); + + SECStatus (*p_AESKeyWrap_Encrypt)(AESKeyWrapContext *cx, + unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_AESKeyWrap_Decrypt)(AESKeyWrapContext *cx, + unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + /* Version 3.004 came to here */ + + PRBool (*p_BLAPI_SHVerify)(const char *name, PRFuncPtr addr); + PRBool (*p_BLAPI_VerifySelf)(const char *name); + + /* Version 3.005 came to here */ + + SECStatus (*p_EC_NewKey)(ECParams *params, + ECPrivateKey **privKey); + + SECStatus (*p_EC_NewKeyFromSeed)(ECParams *params, + ECPrivateKey **privKey, + const unsigned char *seed, + int seedlen); + + SECStatus (*p_EC_ValidatePublicKey)(ECParams *params, + SECItem *publicValue); + + SECStatus (*p_ECDH_Derive)(SECItem *publicValue, + ECParams *params, + SECItem *privateValue, + PRBool withCofactor, + SECItem *derivedSecret); + + SECStatus (*p_ECDSA_SignDigest)(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest); + + SECStatus (*p_ECDSA_VerifyDigest)(ECPublicKey *key, + const SECItem *signature, + const SECItem *digest); + + SECStatus (*p_ECDSA_SignDigestWithSeed)(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed, + const int seedlen); + + /* Version 3.006 came to here */ + + /* no modification to FREEBLVectorStr itself + * but ECParamStr was modified + */ + + /* Version 3.007 came to here */ + + SECStatus (*p_AES_InitContext)(AESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int blocklen); + SECStatus (*p_AESKeyWrap_InitContext)(AESKeyWrapContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int blocklen); + SECStatus (*p_DES_InitContext)(DESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + SECStatus (*p_RC2_InitContext)(RC2Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int effectiveKeyLen, + unsigned int); + SECStatus (*p_RC4_InitContext)(RC4Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *, + int, + unsigned int, + unsigned int); + + AESContext *(*p_AES_AllocateContext)(void); + AESKeyWrapContext *(*p_AESKeyWrap_AllocateContext)(void); + DESContext *(*p_DES_AllocateContext)(void); + RC2Context *(*p_RC2_AllocateContext)(void); + RC4Context *(*p_RC4_AllocateContext)(void); + + void (*p_MD2_Clone)(MD2Context *dest, MD2Context *src); + void (*p_MD5_Clone)(MD5Context *dest, MD5Context *src); + void (*p_SHA1_Clone)(SHA1Context *dest, SHA1Context *src); + void (*p_SHA256_Clone)(SHA256Context *dest, SHA256Context *src); + void (*p_SHA384_Clone)(SHA384Context *dest, SHA384Context *src); + void (*p_SHA512_Clone)(SHA512Context *dest, SHA512Context *src); + + SECStatus (*p_TLS_PRF)(const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS); + + const SECHashObject *(*p_HASH_GetRawHashObject)(HASH_HashType hashType); + + HMACContext *(*p_HMAC_Create)(const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS); + SECStatus (*p_HMAC_Init)(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS); + void (*p_HMAC_Begin)(HMACContext *cx); + void (*p_HMAC_Update)(HMACContext *cx, const unsigned char *data, + unsigned int data_len); + HMACContext *(*p_HMAC_Clone)(HMACContext *cx); + SECStatus (*p_HMAC_Finish)(HMACContext *cx, unsigned char *result, + unsigned int *result_len, + unsigned int max_result_len); + void (*p_HMAC_Destroy)(HMACContext *cx, PRBool freeit); + + void (*p_RNG_SystemInfoForRNG)(void); + + /* Version 3.008 came to here */ + + SECStatus (*p_FIPS186Change_GenerateX)(unsigned char *XKEY, + const unsigned char *XSEEDj, + unsigned char *x_j); + SECStatus (*p_FIPS186Change_ReduceModQForDSA)(const unsigned char *w, + const unsigned char *q, + unsigned char *xj); + + /* Version 3.009 came to here */ + + SECStatus (*p_Camellia_InitContext)(CamelliaContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int unused); + + CamelliaContext *(*p_Camellia_AllocateContext)(void); + CamelliaContext *(*p_Camellia_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen); + void (*p_Camellia_DestroyContext)(CamelliaContext *cx, PRBool freeit); + + SECStatus (*p_Camellia_Encrypt)(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + + SECStatus (*p_Camellia_Decrypt)(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + + void (*p_PQG_DestroyParams)(PQGParams *params); + + void (*p_PQG_DestroyVerify)(PQGVerify *vfy); + + /* Version 3.010 came to here */ + + SECStatus (*p_SEED_InitContext)(SEEDContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + + SEEDContext *(*p_SEED_AllocateContext)(void); + + SEEDContext *(*p_SEED_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); + + void (*p_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit); + + SECStatus (*p_SEED_Encrypt)(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_SEED_Decrypt)(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_BL_Init)(void); + void (*p_BL_SetForkState)(PRBool); + + SECStatus (*p_PRNGTEST_Instantiate)(const PRUint8 *entropy, + unsigned int entropy_len, + const PRUint8 *nonce, + unsigned int nonce_len, + const PRUint8 *personal_string, + unsigned int ps_len); + + SECStatus (*p_PRNGTEST_Reseed)(const PRUint8 *entropy, + unsigned int entropy_len, + const PRUint8 *additional, + unsigned int additional_len); + + SECStatus (*p_PRNGTEST_Generate)(PRUint8 *bytes, + unsigned int bytes_len, + const PRUint8 *additional, + unsigned int additional_len); + + SECStatus (*p_PRNGTEST_Uninstantiate)(void); + /* Version 3.011 came to here */ + + SECStatus (*p_RSA_PopulatePrivateKey)(RSAPrivateKey *key); + + SECStatus (*p_DSA_NewRandom)(PLArenaPool *arena, const SECItem *q, + SECItem *seed); + + SECStatus (*p_JPAKE_Sign)(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *x, const SECItem *testRandom, + const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r); + + SECStatus (*p_JPAKE_Verify)(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *peerID, const SECItem *gx, + const SECItem *gv, const SECItem *r); + + SECStatus (*p_JPAKE_Round2)(PLArenaPool *arena, const SECItem *p, + const SECItem *q, const SECItem *gx1, + const SECItem *gx3, const SECItem *gx4, + SECItem *base, const SECItem *x2, + const SECItem *s, SECItem *x2s); + + SECStatus (*p_JPAKE_Final)(PLArenaPool *arena, const SECItem *p, + const SECItem *q, const SECItem *x2, + const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K); + + /* Version 3.012 came to here */ + + SECStatus (*p_TLS_P_hash)(HASH_HashType hashAlg, + const SECItem *secret, + const char *label, + SECItem *seed, + SECItem *result, + PRBool isFIPS); + + SHA224Context *(*p_SHA224_NewContext)(void); + void (*p_SHA224_DestroyContext)(SHA224Context *cx, PRBool freeit); + void (*p_SHA224_Begin)(SHA224Context *cx); + void (*p_SHA224_Update)(SHA224Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA224_End)(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA224_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA224_Hash)(unsigned char *dest, const char *src); + void (*p_SHA224_TraceState)(SHA224Context *cx); + unsigned int (*p_SHA224_FlattenSize)(SHA224Context *cx); + SECStatus (*p_SHA224_Flatten)(SHA224Context *cx, unsigned char *space); + SHA224Context *(*p_SHA224_Resurrect)(unsigned char *space, void *arg); + void (*p_SHA224_Clone)(SHA224Context *dest, SHA224Context *src); + PRBool (*p_BLAPI_SHVerifyFile)(const char *name); + + /* Version 3.013 came to here */ + + SECStatus (*p_PQG_ParamGenV2)(unsigned int L, unsigned int N, + unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy); + SECStatus (*p_PRNGTEST_RunHealthTests)(void); + + /* Version 3.014 came to here */ + + SECStatus (*p_HMAC_ConstantTime)( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + + SECStatus (*p_SSLv3_MAC_ConstantTime)( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + + /* Version 3.015 came to here */ + + SECStatus (*p_RSA_SignRaw)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_CheckSignRaw)(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + SECStatus (*p_RSA_CheckSignRecoverRaw)(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen); + SECStatus (*p_RSA_EncryptRaw)(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_DecryptRaw)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_EncryptOAEP)(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_DecryptOAEP)(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_EncryptBlock)(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_DecryptBlock)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_SignPSS)(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_CheckSignPSS)(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + SECStatus (*p_RSA_Sign)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_CheckSign)(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen); + SECStatus (*p_RSA_CheckSignRecover)(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen); + + /* Version 3.016 came to here */ + + SECStatus (*p_EC_FillParams)(PLArenaPool *arena, + const SECItem *encodedParams, ECParams *params); + SECStatus (*p_EC_DecodeParams)(const SECItem *encodedParams, + ECParams **ecparams); + SECStatus (*p_EC_CopyParams)(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams); + + /* Version 3.017 came to here */ + + SECStatus (*p_ChaCha20Poly1305_InitContext)(ChaCha20Poly1305Context *ctx, + const unsigned char *key, + unsigned int keyLen, + unsigned int tagLen); + + ChaCha20Poly1305Context *(*p_ChaCha20Poly1305_CreateContext)( + const unsigned char *key, unsigned int keyLen, unsigned int tagLen); + + void (*p_ChaCha20Poly1305_DestroyContext)(ChaCha20Poly1305Context *ctx, + PRBool freeit); + + SECStatus (*p_ChaCha20Poly1305_Seal)( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + + SECStatus (*p_ChaCha20Poly1305_Open)( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + + /* Version 3.018 came to here */ + + int (*p_EC_GetPointSize)(const ECParams *); + + /* Version 3.019 came to here */ + + SECStatus (*p_BLAKE2B_Hash)(unsigned char *dest, const char *src); + SECStatus (*p_BLAKE2B_HashBuf)(unsigned char *output, + const unsigned char *input, PRUint32 inlen); + SECStatus (*p_BLAKE2B_MAC_HashBuf)(unsigned char *output, + const unsigned char *input, + unsigned int inlen, + const unsigned char *key, + unsigned int keylen); + BLAKE2BContext *(*p_BLAKE2B_NewContext)(); + void (*p_BLAKE2B_DestroyContext)(BLAKE2BContext *ctx, PRBool freeit); + SECStatus (*p_BLAKE2B_Begin)(BLAKE2BContext *ctx); + SECStatus (*p_BLAKE2B_MAC_Begin)(BLAKE2BContext *ctx, const PRUint8 *key, + const size_t keylen); + SECStatus (*p_BLAKE2B_Update)(BLAKE2BContext *ctx, const unsigned char *in, + unsigned int inlen); + SECStatus (*p_BLAKE2B_End)(BLAKE2BContext *ctx, unsigned char *out, + unsigned int *digestLen, size_t maxDigestLen); + unsigned int (*p_BLAKE2B_FlattenSize)(BLAKE2BContext *ctx); + SECStatus (*p_BLAKE2B_Flatten)(BLAKE2BContext *ctx, unsigned char *space); + BLAKE2BContext *(*p_BLAKE2B_Resurrect)(unsigned char *space, void *arg); + + /* Version 3.020 came to here */ + + SECStatus (*p_ChaCha20_Xor)(unsigned char *output, const unsigned char *block, + unsigned int len, const unsigned char *k, + const unsigned char *nonce, PRUint32 ctr); + + /* Version 3.021 came to here */ + + SECStatus (*p_CMAC_Init)(CMACContext *ctx, CMACCipher type, + const unsigned char *key, unsigned int key_len); + CMACContext *(*p_CMAC_Create)(CMACCipher type, const unsigned char *key, + unsigned int key_len); + SECStatus (*p_CMAC_Begin)(CMACContext *ctx); + SECStatus (*p_CMAC_Update)(CMACContext *ctx, const unsigned char *data, + unsigned int data_len); + SECStatus (*p_CMAC_Finish)(CMACContext *ctx, unsigned char *result, + unsigned int *result_len, + unsigned int max_result_len); + void (*p_CMAC_Destroy)(CMACContext *ctx, PRBool free_it); + + /* Version 3.022 came to here */ + SECStatus (*p_ChaCha20Poly1305_Encrypt)( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen, unsigned char *tagOut); + + SECStatus (*p_ChaCha20Poly1305_Decrypt)( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen, unsigned char *tagIn); + SECStatus (*p_AES_AEAD)(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + void *params, unsigned int paramsLen, + const unsigned char *aad, unsigned int aadLen); + SECStatus (*p_AESKeyWrap_EncryptKWP)(AESKeyWrapContext *cx, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + + SECStatus (*p_AESKeyWrap_DecryptKWP)(AESKeyWrapContext *cx, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + + /* Version 3.023 came to here */ + + PRBool (*p_KEA_PrimeCheck)(SECItem *prime); + /* Version 3.024 came to here */ + + SECStatus (*p_ChaCha20_InitContext)(ChaCha20Context *ctx, + const unsigned char *key, + unsigned int keyLen, + const unsigned char *nonce, + unsigned int nonceLen, + PRUint32 ctr); + + ChaCha20Context *(*p_ChaCha20_CreateContext)(const unsigned char *key, + unsigned int keyLen, + const unsigned char *nonce, + unsigned int nonceLen, + PRUint32 ctr); + + void (*p_ChaCha20_DestroyContext)(ChaCha20Context *ctx, PRBool freeit); + + /* Version 3.025 came to here */ + + /* Add new function pointers at the end of this struct and bump + * FREEBL_VERSION at the beginning of this file. */ +}; + +typedef struct FREEBLVectorStr FREEBLVector; + +#ifdef FREEBL_LOWHASH +#include "nsslowhash.h" + +#define NSSLOW_VERSION 0x0300 + +struct NSSLOWVectorStr { + unsigned short length; /* of this struct in bytes */ + unsigned short version; /* of this struct. */ + const FREEBLVector *(*p_FREEBL_GetVector)(void); + NSSLOWInitContext *(*p_NSSLOW_Init)(void); + void (*p_NSSLOW_Shutdown)(NSSLOWInitContext *context); + void (*p_NSSLOW_Reset)(NSSLOWInitContext *context); + NSSLOWHASHContext *(*p_NSSLOWHASH_NewContext)( + NSSLOWInitContext *initContext, + HASH_HashType hashType); + void (*p_NSSLOWHASH_Begin)(NSSLOWHASHContext *context); + void (*p_NSSLOWHASH_Update)(NSSLOWHASHContext *context, + const unsigned char *buf, + unsigned int len); + void (*p_NSSLOWHASH_End)(NSSLOWHASHContext *context, + unsigned char *buf, + unsigned int *ret, unsigned int len); + void (*p_NSSLOWHASH_Destroy)(NSSLOWHASHContext *context); + unsigned int (*p_NSSLOWHASH_Length)(NSSLOWHASHContext *context); +}; + +typedef struct NSSLOWVectorStr NSSLOWVector; +#endif + +SEC_BEGIN_PROTOS + +#ifdef FREEBL_LOWHASH +typedef const NSSLOWVector *NSSLOWGetVectorFn(void); + +extern NSSLOWGetVectorFn NSSLOW_GetVector; +#endif + +typedef const FREEBLVector *FREEBLGetVectorFn(void); + +extern FREEBLGetVectorFn FREEBL_GetVector; + +SEC_END_PROTOS + +#endif + +#ifdef NSS_DISABLE_DEPRECATED_SEED +typedef SECStatus (*F_SEED_InitContext)(SEEDContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + +typedef SEEDContext *(*F_SEED_AllocateContext)(void); + +typedef SEEDContext *(*F_SEED_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); + +typedef void (*F_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit); + +typedef SECStatus (*F_SEED_Encrypt)(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +typedef SECStatus (*F_SEED_Decrypt)(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); +#endif + +#ifdef NSS_DISABLE_DEPRECATED_RC2 +typedef RC2Context *(*F_RC2_CreateContext)(const unsigned char *key, + unsigned int len, const unsigned char *iv, + int mode, unsigned effectiveKeyLen); + +typedef void (*F_RC2_DestroyContext)(RC2Context *cx, PRBool freeit); + +typedef SECStatus (*F_RC2_Encrypt)(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +typedef SECStatus (*F_RC2_Decrypt)(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +typedef SECStatus (*F_RC2_InitContext)(RC2Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int effectiveKeyLen, + unsigned int); + +typedef RC2Context *(*F_RC2_AllocateContext)(void); +#endif diff --git a/security/nss/lib/freebl/lowhash_vector.c b/security/nss/lib/freebl/lowhash_vector.c new file mode 100644 index 0000000000..be53bbdc62 --- /dev/null +++ b/security/nss/lib/freebl/lowhash_vector.c @@ -0,0 +1,224 @@ +/* + * loader.c - load platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define _GNU_SOURCE 1 +#include "loader.h" +#include "prmem.h" +#include "prerror.h" +#include "prinit.h" +#include "prenv.h" +#include "blname.c" + +#include "prio.h" +#include "prprf.h" +#include +#include "prsystem.h" +#include "nsslowhash.h" +#include +#include "pratom.h" + +static PRLibrary *blLib; + +#define LSB(x) ((x)&0xff) +#define MSB(x) ((x) >> 8) + +static const NSSLOWVector *vector; +static const char *libraryName = NULL; + +/* pretty much only glibc uses this, make sure we don't have any depenencies + * on nspr.. */ +#undef PORT_Alloc +#undef PORT_Free +#define PORT_Alloc malloc +#define PR_Malloc malloc +#define PORT_Free free +#define PR_Free free +#define PR_GetDirectorySeparator() '/' +#define PR_LoadLibraryWithFlags(libspec, flags) \ + (PRLibrary *)dlopen(libSpec.value.pathname, RTLD_NOW | RTLD_LOCAL) +#define PR_GetLibraryFilePathname(name, addr) \ + freebl_lowhash_getLibraryFilePath(addr) + +static char * +freebl_lowhash_getLibraryFilePath(void *addr) +{ + Dl_info dli; + if (dladdr(addr, &dli) == 0) { + return NULL; + } + return strdup(dli.dli_fname); +} + +/* + * The PR_LoadLibraryWithFlags call above defines this variable away, so we + * don't need it.. + */ +#ifdef nodef +static const char *NameOfThisSharedLib = + SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX; +#endif + +#include "genload.c" + +/* This function must be run only once. */ +/* determine if hybrid platform, then actually load the DSO. */ +static PRStatus +freebl_LoadDSO(void) +{ + PRLibrary *handle; + const char *name = getLibName(); + + if (!name) { + /*PR_SetError(PR_LOAD_LIBRARY_ERROR,0); */ + return PR_FAILURE; + } + handle = loader_LoadLibrary(name); + if (handle) { + void *address = dlsym(handle, "NSSLOW_GetVector"); + if (address) { + NSSLOWGetVectorFn *getVector = (NSSLOWGetVectorFn *)address; + const NSSLOWVector *dsoVector = getVector(); + if (dsoVector) { + unsigned short dsoVersion = dsoVector->version; + unsigned short myVersion = NSSLOW_VERSION; + if (MSB(dsoVersion) == MSB(myVersion) && + LSB(dsoVersion) >= LSB(myVersion) && + dsoVector->length >= sizeof(NSSLOWVector)) { + vector = dsoVector; + libraryName = name; + blLib = handle; + return PR_SUCCESS; + } + } + } + (void)dlclose(handle); + } + return PR_FAILURE; +} + +static PRCallOnceType loadFreeBLOnce; + +static void +freebl_RunLoaderOnce(void) +{ + /* Don't have NSPR, so can use the real PR_CallOnce, implement a stripped + * down version. */ + if (loadFreeBLOnce.initialized) { + return; + } + if (__sync_lock_test_and_set(&loadFreeBLOnce.inProgress, 1) == 0) { + loadFreeBLOnce.status = freebl_LoadDSO(); + loadFreeBLOnce.initialized = 1; + } else { + /* shouldn't have a lot of takers on the else clause, which is good + * since we don't have condition variables yet. + * 'initialized' only ever gets set (not cleared) so we don't + * need the traditional locks. */ + while (!loadFreeBLOnce.initialized) { + sleep(1); /* don't have condition variables, just give up the CPU */ + } + } +} + +static const NSSLOWVector * +freebl_InitVector(void) +{ + if (!vector) { + freebl_RunLoaderOnce(); + } + return vector; +} + +const FREEBLVector * +FREEBL_GetVector(void) +{ + if (freebl_InitVector()) { + return (vector->p_FREEBL_GetVector)(); + } + return NULL; +} + +NSSLOWInitContext * +NSSLOW_Init(void) +{ + if (freebl_InitVector()) { + return (vector->p_NSSLOW_Init)(); + } + return NULL; +} + +void +NSSLOW_Shutdown(NSSLOWInitContext *context) +{ + if (freebl_InitVector()) { + (vector->p_NSSLOW_Shutdown)(context); + } +} + +void +NSSLOW_Reset(NSSLOWInitContext *context) +{ + if (freebl_InitVector()) { + (vector->p_NSSLOW_Reset)(context); + } +} + +NSSLOWHASHContext * +NSSLOWHASH_NewContext( + NSSLOWInitContext *initContext, + HASH_HashType hashType) +{ + if (freebl_InitVector()) { + return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType); + } + return NULL; +} + +void +NSSLOWHASH_Begin(NSSLOWHASHContext *context) +{ + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_Begin)(context); + } +} + +void +NSSLOWHASH_Update(NSSLOWHASHContext *context, + const unsigned char *buf, + unsigned int len) +{ + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_Update)(context, buf, len); + } +} + +void +NSSLOWHASH_End(NSSLOWHASHContext *context, + unsigned char *buf, + unsigned int *ret, unsigned int len) +{ + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_End)(context, buf, ret, len); + } +} + +void +NSSLOWHASH_Destroy(NSSLOWHASHContext *context) +{ + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_Destroy)(context); + } +} + +unsigned int +NSSLOWHASH_Length(NSSLOWHASHContext *context) +{ + if (freebl_InitVector()) { + return (vector->p_NSSLOWHASH_Length)(context); + } + return -1; +} diff --git a/security/nss/lib/freebl/manifest.mn b/security/nss/lib/freebl/manifest.mn new file mode 100644 index 0000000000..b6c5fb3582 --- /dev/null +++ b/security/nss/lib/freebl/manifest.mn @@ -0,0 +1,201 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# NOTE: any ifdefs in this file must be defined on the gmake command line +# (if anywhere). They cannot come from Makefile or config.mk + +CORE_DEPTH = ../.. + +MODULE = nss + +# copied from Linux.mk. We have a chicken and egg issue here. We need to set +# Library name before we call the platform code in coreconf, but we need to +# Pick up the automatic setting of FREEBL_LOWHASH before we can set the +# Library name... so for now we mimic the code in Linux.mk to get the +# automatic setting early... +# +# On Linux 2.6 or later, build libfreebl3.so with no NSPR and libnssutil3.so +# dependencies by default. Set FREEBL_NO_DEPEND to 0 in the environment to +# override this. +# +# +include $(CORE_DEPTH)/coreconf/arch.mk +ifeq ($(OS_ARCH),Linux) +ifneq ($(OS_TARGET),Android) +ifeq (2.6,$(firstword $(sort 2.6 $(OS_RELEASE)))) +ifndef FREEBL_NO_DEPEND +FREEBL_NO_DEPEND = 1 +FREEBL_LOWHASH = 1 +endif +endif +endif +endif + + +LIBRARY_NAME = freebl +LIBRARY_VERSION = 3 + +ifdef FREEBL_CHILD_BUILD + ifdef USE_ABI32_INT32 + LIBRARY_NAME = freebl_32int + endif + ifdef USE_ABI32_INT64 + LIBRARY_NAME = freebl_32int64 + endif + ifdef USE_ABI32_FPU + LIBRARY_NAME = freebl_32fpu + endif + ifdef USE_ABI64_INT + LIBRARY_NAME = freebl_64int + endif + ifdef USE_ABI64_FPU + LIBRARY_NAME = freebl_64fpu + endif + ifdef FREEBL_LOWHASH + LIBRARY_NAME = freeblpriv + endif + ifdef USE_STUB_BUILD + # for the stub build, reset name to the default (from freeblpriv) + LIBRARY_NAME = freebl + endif +endif + +# if the library name contains _, we prefix the version with _ +ifneq (,$(findstring _,$(LIBRARY_NAME))) + LIBRARY_VERSION := _$(LIBRARY_VERSION) +endif + +MAPFILE = $(OBJDIR)/$(LIBRARY_NAME).def + +SOFTOKEN_LIBRARY_VERSION = 3 + +DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \ + -DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \ + -DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\" + +REQUIRES = + +EXPORTS = \ + blapit.h \ + shsign.h \ + ecl-exp.h \ + $(LOWHASH_EXPORTS) \ + $(NULL) + +PRIVATE_EXPORTS = \ + cmac.h \ + alghmac.h \ + blake2b.h \ + blapi.h \ + chacha20poly1305.h \ + hmacct.h \ + secmpi.h \ + secrng.h \ + ec.h \ + ecl.h \ + ecl-curve.h \ + eclt.h \ + $(NULL) + +MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h +MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c + + +ECL_HDRS = ecl-exp.h ecl.h ecp.h ecl-priv.h +ECL_SRCS = ecl.c ecl_mult.c ecl_gf.c \ + ecp_aff.c ecp_jac.c ecp_mont.c \ + ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \ + ecp_256_32.c ecp_25519.c ecp_secp384r1.c ecp_secp521r1.c +SHA_SRCS = sha_fast.c +MPCPU_SRCS = mpcpucache.c +VERIFIED_SRCS = $(NULL) + +CSRCS = \ + freeblver.c \ + ldvector.c \ + sysrand.c \ + $(SHA_SRCS) \ + md2.c \ + md5.c \ + sha512.c \ + cmac.c \ + alghmac.c \ + rawhash.c \ + arcfour.c \ + arcfive.c \ + crypto_primitives.c \ + blake2b.c \ + desblapi.c \ + des.c \ + drbg.c \ + chacha20poly1305.c \ + cts.c \ + ctr.c \ + blinit.c \ + fipsfreebl.c \ + gcm.c \ + hmacct.c \ + rijndael.c \ + aeskeywrap.c \ + camellia.c \ + dh.c \ + ec.c \ + ecdecode.c \ + pqg.c \ + dsa.c \ + rsa.c \ + rsapkcs.c \ + shvfy.c \ + tlsprfalg.c \ + jpake.c \ + secmpi.c \ + $(MPI_SRCS) \ + $(MPCPU_SRCS) \ + $(ECL_SRCS) \ + $(VERIFIED_SRCS) \ + $(STUBS_SRCS) \ + $(LOWHASH_SRCS) \ + $(EXTRA_SRCS) \ + $(NULL) + +ifndef NSS_DISABLE_DEPRECATED_SEED + CSRCS += deprecated/seed.c +endif + +ifndef NSS_DISABLE_DEPRECATED_RC2 + CSRCS += deprecated/alg2268.c +endif + +ALL_CSRCS := $(CSRCS) + +ALL_HDRS = \ + cmac.h \ + alghmac.h \ + blake2b.h \ + blapi.h \ + blapit.h \ + des.h \ + ec.h \ + loader.h \ + rijndael.h \ + camellia.h \ + secmpi.h \ + sha_fast.h \ + sha256.h \ + shsign.h \ + vis_proto.h \ + seed.h \ + $(NULL) + + +ifdef AES_GEN_VAL +DEFINES += -DRIJNDAEL_GENERATE_VALUES +else +ifdef AES_GEN_VAL_M +DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO +else +DEFINES += -DRIJNDAEL_INCLUDE_TABLES +endif +endif diff --git a/security/nss/lib/freebl/md2.c b/security/nss/lib/freebl/md2.c new file mode 100644 index 0000000000..cb3d3d82bc --- /dev/null +++ b/security/nss/lib/freebl/md2.c @@ -0,0 +1,269 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" + +#include "blapi.h" + +#define MD2_DIGEST_LEN 16 +#define MD2_BUFSIZE 16 +#define MD2_X_SIZE 48 /* The X array, [CV | INPUT | TMP VARS] */ +#define MD2_CV 0 /* index into X for chaining variables */ +#define MD2_INPUT 16 /* index into X for input */ +#define MD2_TMPVARS 32 /* index into X for temporary variables */ +#define MD2_CHECKSUM_SIZE 16 + +struct MD2ContextStr { + unsigned char checksum[MD2_BUFSIZE]; + unsigned char X[MD2_X_SIZE]; + PRUint8 unusedBuffer; +}; + +static const PRUint8 MD2S[256] = { + 0051, 0056, 0103, 0311, 0242, 0330, 0174, 0001, + 0075, 0066, 0124, 0241, 0354, 0360, 0006, 0023, + 0142, 0247, 0005, 0363, 0300, 0307, 0163, 0214, + 0230, 0223, 0053, 0331, 0274, 0114, 0202, 0312, + 0036, 0233, 0127, 0074, 0375, 0324, 0340, 0026, + 0147, 0102, 0157, 0030, 0212, 0027, 0345, 0022, + 0276, 0116, 0304, 0326, 0332, 0236, 0336, 0111, + 0240, 0373, 0365, 0216, 0273, 0057, 0356, 0172, + 0251, 0150, 0171, 0221, 0025, 0262, 0007, 0077, + 0224, 0302, 0020, 0211, 0013, 0042, 0137, 0041, + 0200, 0177, 0135, 0232, 0132, 0220, 0062, 0047, + 0065, 0076, 0314, 0347, 0277, 0367, 0227, 0003, + 0377, 0031, 0060, 0263, 0110, 0245, 0265, 0321, + 0327, 0136, 0222, 0052, 0254, 0126, 0252, 0306, + 0117, 0270, 0070, 0322, 0226, 0244, 0175, 0266, + 0166, 0374, 0153, 0342, 0234, 0164, 0004, 0361, + 0105, 0235, 0160, 0131, 0144, 0161, 0207, 0040, + 0206, 0133, 0317, 0145, 0346, 0055, 0250, 0002, + 0033, 0140, 0045, 0255, 0256, 0260, 0271, 0366, + 0034, 0106, 0141, 0151, 0064, 0100, 0176, 0017, + 0125, 0107, 0243, 0043, 0335, 0121, 0257, 0072, + 0303, 0134, 0371, 0316, 0272, 0305, 0352, 0046, + 0054, 0123, 0015, 0156, 0205, 0050, 0204, 0011, + 0323, 0337, 0315, 0364, 0101, 0201, 0115, 0122, + 0152, 0334, 0067, 0310, 0154, 0301, 0253, 0372, + 0044, 0341, 0173, 0010, 0014, 0275, 0261, 0112, + 0170, 0210, 0225, 0213, 0343, 0143, 0350, 0155, + 0351, 0313, 0325, 0376, 0073, 0000, 0035, 0071, + 0362, 0357, 0267, 0016, 0146, 0130, 0320, 0344, + 0246, 0167, 0162, 0370, 0353, 0165, 0113, 0012, + 0061, 0104, 0120, 0264, 0217, 0355, 0037, 0032, + 0333, 0231, 0215, 0063, 0237, 0021, 0203, 0024 +}; + +SECStatus +MD2_Hash(unsigned char *dest, const char *src) +{ + unsigned int len; + MD2Context *cx = MD2_NewContext(); + if (!cx) { + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return SECFailure; + } + MD2_Begin(cx); + MD2_Update(cx, (const unsigned char *)src, PORT_Strlen(src)); + MD2_End(cx, dest, &len, MD2_DIGEST_LEN); + MD2_DestroyContext(cx, PR_TRUE); + return SECSuccess; +} + +MD2Context * +MD2_NewContext(void) +{ + MD2Context *cx = (MD2Context *)PORT_ZAlloc(sizeof(MD2Context)); + if (cx == NULL) { + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return NULL; + } + return cx; +} + +void +MD2_DestroyContext(MD2Context *cx, PRBool freeit) +{ + if (freeit) + PORT_ZFree(cx, sizeof(*cx)); +} + +void +MD2_Begin(MD2Context *cx) +{ + memset(cx, 0, sizeof(*cx)); + cx->unusedBuffer = MD2_BUFSIZE; +} + +static void +md2_compress(MD2Context *cx) +{ + int j; + unsigned char P; + P = cx->checksum[MD2_CHECKSUM_SIZE - 1]; +/* Compute the running checksum, and set the tmp variables to be + * CV[i] XOR input[i] + */ +#define CKSUMFN(n) \ + P = cx->checksum[n] ^ MD2S[cx->X[MD2_INPUT + n] ^ P]; \ + cx->checksum[n] = P; \ + cx->X[MD2_TMPVARS + n] = cx->X[n] ^ cx->X[MD2_INPUT + n]; + CKSUMFN(0); + CKSUMFN(1); + CKSUMFN(2); + CKSUMFN(3); + CKSUMFN(4); + CKSUMFN(5); + CKSUMFN(6); + CKSUMFN(7); + CKSUMFN(8); + CKSUMFN(9); + CKSUMFN(10); + CKSUMFN(11); + CKSUMFN(12); + CKSUMFN(13); + CKSUMFN(14); + CKSUMFN(15); +/* The compression function. */ +#define COMPRESS(n) \ + P = cx->X[n] ^ MD2S[P]; \ + cx->X[n] = P; + P = 0x00; + for (j = 0; j < 18; j++) { + COMPRESS(0); + COMPRESS(1); + COMPRESS(2); + COMPRESS(3); + COMPRESS(4); + COMPRESS(5); + COMPRESS(6); + COMPRESS(7); + COMPRESS(8); + COMPRESS(9); + COMPRESS(10); + COMPRESS(11); + COMPRESS(12); + COMPRESS(13); + COMPRESS(14); + COMPRESS(15); + COMPRESS(16); + COMPRESS(17); + COMPRESS(18); + COMPRESS(19); + COMPRESS(20); + COMPRESS(21); + COMPRESS(22); + COMPRESS(23); + COMPRESS(24); + COMPRESS(25); + COMPRESS(26); + COMPRESS(27); + COMPRESS(28); + COMPRESS(29); + COMPRESS(30); + COMPRESS(31); + COMPRESS(32); + COMPRESS(33); + COMPRESS(34); + COMPRESS(35); + COMPRESS(36); + COMPRESS(37); + COMPRESS(38); + COMPRESS(39); + COMPRESS(40); + COMPRESS(41); + COMPRESS(42); + COMPRESS(43); + COMPRESS(44); + COMPRESS(45); + COMPRESS(46); + COMPRESS(47); + P = (P + j) % 256; + } + cx->unusedBuffer = MD2_BUFSIZE; +} + +void +MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen) +{ + PRUint32 bytesToConsume; + + /* Fill the remaining input buffer. */ + if (cx->unusedBuffer != MD2_BUFSIZE) { + bytesToConsume = PR_MIN(inputLen, cx->unusedBuffer); + memcpy(&cx->X[MD2_INPUT + (MD2_BUFSIZE - cx->unusedBuffer)], + input, bytesToConsume); + if (cx->unusedBuffer + bytesToConsume >= MD2_BUFSIZE) + md2_compress(cx); + inputLen -= bytesToConsume; + input += bytesToConsume; + } + + /* Iterate over 16-byte chunks of the input. */ + while (inputLen >= MD2_BUFSIZE) { + memcpy(&cx->X[MD2_INPUT], input, MD2_BUFSIZE); + md2_compress(cx); + inputLen -= MD2_BUFSIZE; + input += MD2_BUFSIZE; + } + + /* Copy any input that remains into the buffer. */ + if (inputLen) + memcpy(&cx->X[MD2_INPUT], input, inputLen); + cx->unusedBuffer = MD2_BUFSIZE - inputLen; +} + +void +MD2_End(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + PRUint8 padStart; + if (maxDigestLen < MD2_BUFSIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + padStart = MD2_BUFSIZE - cx->unusedBuffer; + memset(&cx->X[MD2_INPUT + padStart], cx->unusedBuffer, + cx->unusedBuffer); + md2_compress(cx); + memcpy(&cx->X[MD2_INPUT], cx->checksum, MD2_BUFSIZE); + md2_compress(cx); + *digestLen = MD2_DIGEST_LEN; + memcpy(digest, &cx->X[MD2_CV], MD2_DIGEST_LEN); +} + +unsigned int +MD2_FlattenSize(MD2Context *cx) +{ + return sizeof(*cx); +} + +SECStatus +MD2_Flatten(MD2Context *cx, unsigned char *space) +{ + memcpy(space, cx, sizeof(*cx)); + return SECSuccess; +} + +MD2Context * +MD2_Resurrect(unsigned char *space, void *arg) +{ + MD2Context *cx = MD2_NewContext(); + if (cx) + memcpy(cx, space, sizeof(*cx)); + return cx; +} + +void +MD2_Clone(MD2Context *dest, MD2Context *src) +{ + memcpy(dest, src, sizeof *dest); +} diff --git a/security/nss/lib/freebl/md5.c b/security/nss/lib/freebl/md5.c new file mode 100644 index 0000000000..bdd36a61bd --- /dev/null +++ b/security/nss/lib/freebl/md5.c @@ -0,0 +1,598 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "prlong.h" + +#include "blapi.h" +#include "blapii.h" + +#define MD5_HASH_LEN 16 +#define MD5_BUFFER_SIZE 64 +#define MD5_END_BUFFER (MD5_BUFFER_SIZE - 8) + +#define CV0_1 0x67452301 +#define CV0_2 0xefcdab89 +#define CV0_3 0x98badcfe +#define CV0_4 0x10325476 + +#define T1_0 0xd76aa478 +#define T1_1 0xe8c7b756 +#define T1_2 0x242070db +#define T1_3 0xc1bdceee +#define T1_4 0xf57c0faf +#define T1_5 0x4787c62a +#define T1_6 0xa8304613 +#define T1_7 0xfd469501 +#define T1_8 0x698098d8 +#define T1_9 0x8b44f7af +#define T1_10 0xffff5bb1 +#define T1_11 0x895cd7be +#define T1_12 0x6b901122 +#define T1_13 0xfd987193 +#define T1_14 0xa679438e +#define T1_15 0x49b40821 + +#define T2_0 0xf61e2562 +#define T2_1 0xc040b340 +#define T2_2 0x265e5a51 +#define T2_3 0xe9b6c7aa +#define T2_4 0xd62f105d +#define T2_5 0x02441453 +#define T2_6 0xd8a1e681 +#define T2_7 0xe7d3fbc8 +#define T2_8 0x21e1cde6 +#define T2_9 0xc33707d6 +#define T2_10 0xf4d50d87 +#define T2_11 0x455a14ed +#define T2_12 0xa9e3e905 +#define T2_13 0xfcefa3f8 +#define T2_14 0x676f02d9 +#define T2_15 0x8d2a4c8a + +#define T3_0 0xfffa3942 +#define T3_1 0x8771f681 +#define T3_2 0x6d9d6122 +#define T3_3 0xfde5380c +#define T3_4 0xa4beea44 +#define T3_5 0x4bdecfa9 +#define T3_6 0xf6bb4b60 +#define T3_7 0xbebfbc70 +#define T3_8 0x289b7ec6 +#define T3_9 0xeaa127fa +#define T3_10 0xd4ef3085 +#define T3_11 0x04881d05 +#define T3_12 0xd9d4d039 +#define T3_13 0xe6db99e5 +#define T3_14 0x1fa27cf8 +#define T3_15 0xc4ac5665 + +#define T4_0 0xf4292244 +#define T4_1 0x432aff97 +#define T4_2 0xab9423a7 +#define T4_3 0xfc93a039 +#define T4_4 0x655b59c3 +#define T4_5 0x8f0ccc92 +#define T4_6 0xffeff47d +#define T4_7 0x85845dd1 +#define T4_8 0x6fa87e4f +#define T4_9 0xfe2ce6e0 +#define T4_10 0xa3014314 +#define T4_11 0x4e0811a1 +#define T4_12 0xf7537e82 +#define T4_13 0xbd3af235 +#define T4_14 0x2ad7d2bb +#define T4_15 0xeb86d391 + +#define R1B0 0 +#define R1B1 1 +#define R1B2 2 +#define R1B3 3 +#define R1B4 4 +#define R1B5 5 +#define R1B6 6 +#define R1B7 7 +#define R1B8 8 +#define R1B9 9 +#define R1B10 10 +#define R1B11 11 +#define R1B12 12 +#define R1B13 13 +#define R1B14 14 +#define R1B15 15 + +#define R2B0 1 +#define R2B1 6 +#define R2B2 11 +#define R2B3 0 +#define R2B4 5 +#define R2B5 10 +#define R2B6 15 +#define R2B7 4 +#define R2B8 9 +#define R2B9 14 +#define R2B10 3 +#define R2B11 8 +#define R2B12 13 +#define R2B13 2 +#define R2B14 7 +#define R2B15 12 + +#define R3B0 5 +#define R3B1 8 +#define R3B2 11 +#define R3B3 14 +#define R3B4 1 +#define R3B5 4 +#define R3B6 7 +#define R3B7 10 +#define R3B8 13 +#define R3B9 0 +#define R3B10 3 +#define R3B11 6 +#define R3B12 9 +#define R3B13 12 +#define R3B14 15 +#define R3B15 2 + +#define R4B0 0 +#define R4B1 7 +#define R4B2 14 +#define R4B3 5 +#define R4B4 12 +#define R4B5 3 +#define R4B6 10 +#define R4B7 1 +#define R4B8 8 +#define R4B9 15 +#define R4B10 6 +#define R4B11 13 +#define R4B12 4 +#define R4B13 11 +#define R4B14 2 +#define R4B15 9 + +#define S1_0 7 +#define S1_1 12 +#define S1_2 17 +#define S1_3 22 + +#define S2_0 5 +#define S2_1 9 +#define S2_2 14 +#define S2_3 20 + +#define S3_0 4 +#define S3_1 11 +#define S3_2 16 +#define S3_3 23 + +#define S4_0 6 +#define S4_1 10 +#define S4_2 15 +#define S4_3 21 + +struct MD5ContextStr { + PRUint32 lsbInput; + PRUint32 msbInput; + PRUint32 cv[4]; + union { + PRUint8 b[64]; + PRUint32 w[16]; + } u; +}; + +#define inBuf u.b + +SECStatus +MD5_Hash(unsigned char *dest, const char *src) +{ + return MD5_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +SECStatus +MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + unsigned int len; + MD5Context cx; + + MD5_Begin(&cx); + MD5_Update(&cx, src, src_length); + MD5_End(&cx, dest, &len, MD5_HASH_LEN); + memset(&cx, 0, sizeof cx); + return SECSuccess; +} + +MD5Context * +MD5_NewContext(void) +{ + /* no need to ZAlloc, MD5_Begin will init the context */ + MD5Context *cx = (MD5Context *)PORT_Alloc(sizeof(MD5Context)); + if (cx == NULL) { + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return NULL; + } + return cx; +} + +void +MD5_DestroyContext(MD5Context *cx, PRBool freeit) +{ + memset(cx, 0, sizeof *cx); + if (freeit) { + PORT_Free(cx); + } +} + +void +MD5_Begin(MD5Context *cx) +{ + cx->lsbInput = 0; + cx->msbInput = 0; + /* memset(cx->inBuf, 0, sizeof(cx->inBuf)); */ + cx->cv[0] = CV0_1; + cx->cv[1] = CV0_2; + cx->cv[2] = CV0_3; + cx->cv[3] = CV0_4; +} + +#define cls(i32, s) (tmp = i32, tmp << s | tmp >> (32 - s)) + +#if defined(SOLARIS) || defined(HPUX) +#define addto64(sumhigh, sumlow, addend) \ + sumlow += addend; \ + sumhigh += (sumlow < addend); +#else +#define addto64(sumhigh, sumlow, addend) \ + sumlow += addend; \ + if (sumlow < addend) \ + ++sumhigh; +#endif + +#define MASK 0x00ff00ff +#ifdef IS_LITTLE_ENDIAN +#define lendian(i32) \ + (i32) +#else +#define lendian(i32) \ + (tmp = (i32 >> 16) | (i32 << 16), ((tmp & MASK) << 8) | ((tmp >> 8) & MASK)) +#endif + +#ifndef IS_LITTLE_ENDIAN + +#define lebytes(b4) \ + ((b4)[3] << 24 | (b4)[2] << 16 | (b4)[1] << 8 | (b4)[0]) + +static void +md5_prep_state_le(MD5Context *cx) +{ + PRUint32 tmp; + cx->u.w[0] = lendian(cx->u.w[0]); + cx->u.w[1] = lendian(cx->u.w[1]); + cx->u.w[2] = lendian(cx->u.w[2]); + cx->u.w[3] = lendian(cx->u.w[3]); + cx->u.w[4] = lendian(cx->u.w[4]); + cx->u.w[5] = lendian(cx->u.w[5]); + cx->u.w[6] = lendian(cx->u.w[6]); + cx->u.w[7] = lendian(cx->u.w[7]); + cx->u.w[8] = lendian(cx->u.w[8]); + cx->u.w[9] = lendian(cx->u.w[9]); + cx->u.w[10] = lendian(cx->u.w[10]); + cx->u.w[11] = lendian(cx->u.w[11]); + cx->u.w[12] = lendian(cx->u.w[12]); + cx->u.w[13] = lendian(cx->u.w[13]); + cx->u.w[14] = lendian(cx->u.w[14]); + cx->u.w[15] = lendian(cx->u.w[15]); +} + +static void +md5_prep_buffer_le(MD5Context *cx, const PRUint8 *beBuf) +{ + cx->u.w[0] = lebytes(&beBuf[0]); + cx->u.w[1] = lebytes(&beBuf[4]); + cx->u.w[2] = lebytes(&beBuf[8]); + cx->u.w[3] = lebytes(&beBuf[12]); + cx->u.w[4] = lebytes(&beBuf[16]); + cx->u.w[5] = lebytes(&beBuf[20]); + cx->u.w[6] = lebytes(&beBuf[24]); + cx->u.w[7] = lebytes(&beBuf[28]); + cx->u.w[8] = lebytes(&beBuf[32]); + cx->u.w[9] = lebytes(&beBuf[36]); + cx->u.w[10] = lebytes(&beBuf[40]); + cx->u.w[11] = lebytes(&beBuf[44]); + cx->u.w[12] = lebytes(&beBuf[48]); + cx->u.w[13] = lebytes(&beBuf[52]); + cx->u.w[14] = lebytes(&beBuf[56]); + cx->u.w[15] = lebytes(&beBuf[60]); +} +#endif + +#define F(X, Y, Z) \ + ((X & Y) | ((~X) & Z)) + +#define G(X, Y, Z) \ + ((X & Z) | (Y & (~Z))) + +#define H(X, Y, Z) \ + (X ^ Y ^ Z) + +#define I(X, Y, Z) \ + (Y ^ (X | (~Z))) + +#define FF(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + F(b, c, d) + bufint + ti, s) + +#define GG(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + G(b, c, d) + bufint + ti, s) + +#define HH(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + H(b, c, d) + bufint + ti, s) + +#define II(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + I(b, c, d) + bufint + ti, s) + +static void NO_SANITIZE_ALIGNMENT +md5_compress(MD5Context *cx, const PRUint32 *wBuf) +{ + PRUint32 a, b, c, d; + PRUint32 tmp; + a = cx->cv[0]; + b = cx->cv[1]; + c = cx->cv[2]; + d = cx->cv[3]; + FF(a, b, c, d, wBuf[R1B0], S1_0, T1_0); + FF(d, a, b, c, wBuf[R1B1], S1_1, T1_1); + FF(c, d, a, b, wBuf[R1B2], S1_2, T1_2); + FF(b, c, d, a, wBuf[R1B3], S1_3, T1_3); + FF(a, b, c, d, wBuf[R1B4], S1_0, T1_4); + FF(d, a, b, c, wBuf[R1B5], S1_1, T1_5); + FF(c, d, a, b, wBuf[R1B6], S1_2, T1_6); + FF(b, c, d, a, wBuf[R1B7], S1_3, T1_7); + FF(a, b, c, d, wBuf[R1B8], S1_0, T1_8); + FF(d, a, b, c, wBuf[R1B9], S1_1, T1_9); + FF(c, d, a, b, wBuf[R1B10], S1_2, T1_10); + FF(b, c, d, a, wBuf[R1B11], S1_3, T1_11); + FF(a, b, c, d, wBuf[R1B12], S1_0, T1_12); + FF(d, a, b, c, wBuf[R1B13], S1_1, T1_13); + FF(c, d, a, b, wBuf[R1B14], S1_2, T1_14); + FF(b, c, d, a, wBuf[R1B15], S1_3, T1_15); + GG(a, b, c, d, wBuf[R2B0], S2_0, T2_0); + GG(d, a, b, c, wBuf[R2B1], S2_1, T2_1); + GG(c, d, a, b, wBuf[R2B2], S2_2, T2_2); + GG(b, c, d, a, wBuf[R2B3], S2_3, T2_3); + GG(a, b, c, d, wBuf[R2B4], S2_0, T2_4); + GG(d, a, b, c, wBuf[R2B5], S2_1, T2_5); + GG(c, d, a, b, wBuf[R2B6], S2_2, T2_6); + GG(b, c, d, a, wBuf[R2B7], S2_3, T2_7); + GG(a, b, c, d, wBuf[R2B8], S2_0, T2_8); + GG(d, a, b, c, wBuf[R2B9], S2_1, T2_9); + GG(c, d, a, b, wBuf[R2B10], S2_2, T2_10); + GG(b, c, d, a, wBuf[R2B11], S2_3, T2_11); + GG(a, b, c, d, wBuf[R2B12], S2_0, T2_12); + GG(d, a, b, c, wBuf[R2B13], S2_1, T2_13); + GG(c, d, a, b, wBuf[R2B14], S2_2, T2_14); + GG(b, c, d, a, wBuf[R2B15], S2_3, T2_15); + HH(a, b, c, d, wBuf[R3B0], S3_0, T3_0); + HH(d, a, b, c, wBuf[R3B1], S3_1, T3_1); + HH(c, d, a, b, wBuf[R3B2], S3_2, T3_2); + HH(b, c, d, a, wBuf[R3B3], S3_3, T3_3); + HH(a, b, c, d, wBuf[R3B4], S3_0, T3_4); + HH(d, a, b, c, wBuf[R3B5], S3_1, T3_5); + HH(c, d, a, b, wBuf[R3B6], S3_2, T3_6); + HH(b, c, d, a, wBuf[R3B7], S3_3, T3_7); + HH(a, b, c, d, wBuf[R3B8], S3_0, T3_8); + HH(d, a, b, c, wBuf[R3B9], S3_1, T3_9); + HH(c, d, a, b, wBuf[R3B10], S3_2, T3_10); + HH(b, c, d, a, wBuf[R3B11], S3_3, T3_11); + HH(a, b, c, d, wBuf[R3B12], S3_0, T3_12); + HH(d, a, b, c, wBuf[R3B13], S3_1, T3_13); + HH(c, d, a, b, wBuf[R3B14], S3_2, T3_14); + HH(b, c, d, a, wBuf[R3B15], S3_3, T3_15); + II(a, b, c, d, wBuf[R4B0], S4_0, T4_0); + II(d, a, b, c, wBuf[R4B1], S4_1, T4_1); + II(c, d, a, b, wBuf[R4B2], S4_2, T4_2); + II(b, c, d, a, wBuf[R4B3], S4_3, T4_3); + II(a, b, c, d, wBuf[R4B4], S4_0, T4_4); + II(d, a, b, c, wBuf[R4B5], S4_1, T4_5); + II(c, d, a, b, wBuf[R4B6], S4_2, T4_6); + II(b, c, d, a, wBuf[R4B7], S4_3, T4_7); + II(a, b, c, d, wBuf[R4B8], S4_0, T4_8); + II(d, a, b, c, wBuf[R4B9], S4_1, T4_9); + II(c, d, a, b, wBuf[R4B10], S4_2, T4_10); + II(b, c, d, a, wBuf[R4B11], S4_3, T4_11); + II(a, b, c, d, wBuf[R4B12], S4_0, T4_12); + II(d, a, b, c, wBuf[R4B13], S4_1, T4_13); + II(c, d, a, b, wBuf[R4B14], S4_2, T4_14); + II(b, c, d, a, wBuf[R4B15], S4_3, T4_15); + cx->cv[0] += a; + cx->cv[1] += b; + cx->cv[2] += c; + cx->cv[3] += d; +} + +void +MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen) +{ + PRUint32 bytesToConsume; + PRUint32 inBufIndex = cx->lsbInput & 63; + const PRUint32 *wBuf; + + /* Add the number of input bytes to the 64-bit input counter. */ + addto64(cx->msbInput, cx->lsbInput, inputLen); + if (inBufIndex) { + /* There is already data in the buffer. Fill with input. */ + bytesToConsume = PR_MIN(inputLen, MD5_BUFFER_SIZE - inBufIndex); + memcpy(&cx->inBuf[inBufIndex], input, bytesToConsume); + if (inBufIndex + bytesToConsume >= MD5_BUFFER_SIZE) { +/* The buffer is filled. Run the compression function. */ +#ifndef IS_LITTLE_ENDIAN + md5_prep_state_le(cx); +#endif + md5_compress(cx, cx->u.w); + } + /* Remaining input. */ + inputLen -= bytesToConsume; + input += bytesToConsume; + } + + /* Iterate over 64-byte chunks of the message. */ + while (inputLen >= MD5_BUFFER_SIZE) { +#ifdef IS_LITTLE_ENDIAN +#ifdef HAVE_UNALIGNED_ACCESS + /* x86 can handle arithmetic on non-word-aligned buffers */ + wBuf = (PRUint32 *)input; +#else + if ((ptrdiff_t)input & 0x3) { + /* buffer not aligned, copy it to force alignment */ + memcpy(cx->inBuf, input, MD5_BUFFER_SIZE); + wBuf = cx->u.w; + } else { + /* buffer is aligned */ + wBuf = (PRUint32 *)input; + } +#endif +#else + md5_prep_buffer_le(cx, input); + wBuf = cx->u.w; +#endif + md5_compress(cx, wBuf); + inputLen -= MD5_BUFFER_SIZE; + input += MD5_BUFFER_SIZE; + } + + /* Tail of message (message bytes mod 64). */ + if (inputLen) + memcpy(cx->inBuf, input, inputLen); +} + +static const unsigned char padbytes[] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +void +MD5_End(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ +#ifndef IS_LITTLE_ENDIAN + PRUint32 tmp; +#endif + PRUint32 lowInput, highInput; + PRUint32 inBufIndex = cx->lsbInput & 63; + + if (maxDigestLen < MD5_HASH_LEN) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + + /* Copy out the length of bits input before padding. */ + lowInput = cx->lsbInput; + highInput = (cx->msbInput << 3) | (lowInput >> 29); + lowInput <<= 3; + + if (inBufIndex < MD5_END_BUFFER) { + MD5_Update(cx, padbytes, MD5_END_BUFFER - inBufIndex); + } else { + MD5_Update(cx, padbytes, + MD5_END_BUFFER + MD5_BUFFER_SIZE - inBufIndex); + } + + /* Store the number of bytes input (before padding) in final 64 bits. */ + cx->u.w[14] = lendian(lowInput); + cx->u.w[15] = lendian(highInput); + +/* Final call to compress. */ +#ifndef IS_LITTLE_ENDIAN + md5_prep_state_le(cx); +#endif + md5_compress(cx, cx->u.w); + + /* Copy the resulting values out of the chain variables into return buf. */ + if (digestLen) + *digestLen = MD5_HASH_LEN; +#ifndef IS_LITTLE_ENDIAN + cx->cv[0] = lendian(cx->cv[0]); + cx->cv[1] = lendian(cx->cv[1]); + cx->cv[2] = lendian(cx->cv[2]); + cx->cv[3] = lendian(cx->cv[3]); +#endif + memcpy(digest, cx->cv, MD5_HASH_LEN); +} + +void +MD5_EndRaw(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ +#ifndef IS_LITTLE_ENDIAN + PRUint32 tmp; +#endif + PRUint32 cv[4]; + + if (maxDigestLen < MD5_HASH_LEN) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + + memcpy(cv, cx->cv, sizeof(cv)); +#ifndef IS_LITTLE_ENDIAN + cv[0] = lendian(cv[0]); + cv[1] = lendian(cv[1]); + cv[2] = lendian(cv[2]); + cv[3] = lendian(cv[3]); +#endif + memcpy(digest, cv, MD5_HASH_LEN); + if (digestLen) + *digestLen = MD5_HASH_LEN; +} + +unsigned int +MD5_FlattenSize(MD5Context *cx) +{ + return sizeof(*cx); +} + +SECStatus +MD5_Flatten(MD5Context *cx, unsigned char *space) +{ + memcpy(space, cx, sizeof(*cx)); + return SECSuccess; +} + +MD5Context * +MD5_Resurrect(unsigned char *space, void *arg) +{ + MD5Context *cx = MD5_NewContext(); + if (cx) + memcpy(cx, space, sizeof(*cx)); + return cx; +} + +void +MD5_Clone(MD5Context *dest, MD5Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +void +MD5_TraceState(MD5Context *cx) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); +} diff --git a/security/nss/lib/freebl/mknewpc2.c b/security/nss/lib/freebl/mknewpc2.c new file mode 100644 index 0000000000..6b29688163 --- /dev/null +++ b/security/nss/lib/freebl/mknewpc2.c @@ -0,0 +1,208 @@ +/* + * mknewpc2.c + * + * Generate PC-2 tables for DES-150 library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +typedef unsigned char BYTE; +typedef unsigned int HALF; + +#define DES_ENCRYPT 0 +#define DES_DECRYPT 1 + +/* two 28-bit registers defined in key schedule production process */ +static HALF C0, D0; + +static HALF L0, R0; + +/* key schedule, 16 internal keys, each with 8 6-bit parts */ +static BYTE KS[8][16]; + +/* + * This table takes the 56 bits in C0 and D0 and shows show they are + * permuted into the 8 6-bit parts of the key in the key schedule. + * The bits of C0 are numbered left to right, 1-28. + * The bits of D0 are numbered left to right, 29-56. + * Zeros in this table represent bits that are always zero. + * Note that all the bits in the first 4 rows come from C0, + * and all the bits in the second 4 rows come from D0. + */ +static const BYTE PC2[64] = { + 14, 17, 11, 24, 1, 5, 0, 0, /* S1 */ + 3, 28, 15, 6, 21, 10, 0, 0, /* S2 */ + 23, 19, 12, 4, 26, 8, 0, 0, /* S3 */ + 16, 7, 27, 20, 13, 2, 0, 0, /* S4 */ + + 41, 52, 31, 37, 47, 55, 0, 0, /* S5 */ + 30, 40, 51, 45, 33, 48, 0, 0, /* S6 */ + 44, 49, 39, 56, 34, 53, 0, 0, /* S7 */ + 46, 42, 50, 36, 29, 32, 0, 0 /* S8 */ +}; + +/* This table represents the same info as PC2, except that + * The bits of C0 and D0 are each numbered right to left, 0-27. + * -1 values indicate bits that are always zero. + * As before all the bits in the first 4 rows come from C0, + * and all the bits in the second 4 rows come from D0. + */ +static signed char PC2a[64] = { + /* bits of C0 */ + 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */ + 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */ + 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */ + 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */ + /* bits of D0 */ + 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */ + 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */ + 12, 7, 17, 0, 22, 3, -1, -1, /* S7 */ + 10, 14, 6, 20, 27, 24, -1, -1 /* S8 */ +}; + +/* This table represents the same info as PC2a, except that + * The order of of the rows has been changed to increase the efficiency + * with which the key sechedule is created. + * Fewer shifts and ANDs are required to make the KS from these. + */ +static const signed char PC2b[64] = { + /* bits of C0 */ + 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */ + 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */ + 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */ + 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */ + /* bits of D0 */ + 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */ + 10, 14, 6, 20, 27, 24, -1, -1, /* S8 */ + 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */ + 12, 7, 17, 0, 22, 3, -1, -1 /* S7 */ +}; + +/* Only 24 of the 28 bits in C0 and D0 are used in PC2. + * The used bits of C0 and D0 are grouped into 4 groups of 6, + * so that the PC2 permutation can be accomplished with 4 lookups + * in tables of 64 entries. + * The following table shows how the bits of C0 and D0 are grouped + * into indexes for the respective table lookups. + * Bits are numbered right-to-left, 0-27, as in PC2b. + */ +static BYTE NDX[48] = { + /* Bits of C0 */ + 27, 26, 25, 24, 23, 22, /* C0 table 0 */ + 18, 17, 16, 15, 14, 13, /* C0 table 1 */ + 9, 8, 7, 2, 1, 0, /* C0 table 2 */ + 5, 4, 21, 20, 12, 11, /* C0 table 3 */ + /* bits of D0 */ + 27, 26, 25, 24, 23, 22, /* D0 table 0 */ + 20, 19, 17, 16, 15, 14, /* D0 table 1 */ + 12, 11, 10, 9, 8, 7, /* D0 table 2 */ + 6, 5, 4, 3, 1, 0 /* D0 table 3 */ +}; + +/* Here's the code that does that grouping. + left = PC2LOOKUP(0, 0, ((c0 >> 22) & 0x3F) ); + left |= PC2LOOKUP(0, 1, ((c0 >> 13) & 0x3F) ); + left |= PC2LOOKUP(0, 2, ((c0 >> 4) & 0x38) | (c0 & 0x7) ); + left |= PC2LOOKUP(0, 3, ((c0>>18)&0xC) | ((c0>>11)&0x3) | (c0&0x30)); + + right = PC2LOOKUP(1, 0, ((d0 >> 22) & 0x3F) ); + right |= PC2LOOKUP(1, 1, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf) ); + right |= PC2LOOKUP(1, 2, ((d0 >> 7) & 0x3F) ); + right |= PC2LOOKUP(1, 3, ((d0 >> 1) & 0x3C) | (d0 & 0x3)); +*/ + +void +make_pc2a(void) +{ + + int i, j; + + for (i = 0; i < 64; ++i) { + j = PC2[i]; + if (j == 0) + j = -1; + else if (j < 29) + j = 28 - j; + else + j = 56 - j; + PC2a[i] = j; + } + for (i = 0; i < 64; i += 8) { + printf("%3d,%3d,%3d,%3d,%3d,%3d,%3d,%3d,\n", + PC2a[i + 0], PC2a[i + 1], PC2a[i + 2], PC2a[i + 3], + PC2a[i + 4], PC2a[i + 5], PC2a[i + 6], PC2a[i + 7]); + } +} + +HALF PC2cd0[64]; + +HALF PC_2H[8][64]; + +void +mktable() +{ + int i; + int table; + const BYTE* ndx = NDX; + HALF mask; + + mask = 0x80000000; + for (i = 0; i < 32; ++i, mask >>= 1) { + int bit = PC2b[i]; + if (bit < 0) + continue; + PC2cd0[bit + 32] = mask; + } + + mask = 0x80000000; + for (i = 32; i < 64; ++i, mask >>= 1) { + int bit = PC2b[i]; + if (bit < 0) + continue; + PC2cd0[bit] = mask; + } + +#if DEBUG + for (i = 0; i < 64; ++i) { + printf("0x%08x,\n", PC2cd0[i]); + } +#endif + for (i = 0; i < 24; ++i) { + NDX[i] += 32; /* because c0 is the upper half */ + } + + for (table = 0; table < 8; ++table) { + HALF bitvals[6]; + for (i = 0; i < 6; ++i) { + bitvals[5 - i] = PC2cd0[*ndx++]; + } + for (i = 0; i < 64; ++i) { + int j; + int k = 0; + HALF value = 0; + + for (j = i; j; j >>= 1, ++k) { + if (j & 1) { + value |= bitvals[k]; + } + } + PC_2H[table][i] = value; + } + printf("/* table %d */ {\n", table); + for (i = 0; i < 64; i += 4) { + printf(" 0x%08x, 0x%08x, 0x%08x, 0x%08x, \n", + PC_2H[table][i], PC_2H[table][i + 1], + PC_2H[table][i + 2], PC_2H[table][i + 3]); + } + printf(" },\n"); + } +} + +int +main(void) +{ + /* make_pc2a(); */ + mktable(); + return 0; +} diff --git a/security/nss/lib/freebl/mksp.c b/security/nss/lib/freebl/mksp.c new file mode 100644 index 0000000000..ca83ac8e7c --- /dev/null +++ b/security/nss/lib/freebl/mksp.c @@ -0,0 +1,119 @@ +/* + * mksp.c + * + * Generate SP tables for DES-150 library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +/* + * sboxes - the tables for the s-box functions + * from FIPS 46, pages 15-16. + */ +unsigned char S[8][64] = { + /* Func S1 = */ + { 14, 0, 4, 15, 13, 7, 1, 4, 2, 14, 15, 2, 11, 13, 8, 1, + 3, 10, 10, 6, 6, 12, 12, 11, 5, 9, 9, 5, 0, 3, 7, 8, + 4, 15, 1, 12, 14, 8, 8, 2, 13, 4, 6, 9, 2, 1, 11, 7, + 15, 5, 12, 11, 9, 3, 7, 14, 3, 10, 10, 0, 5, 6, 0, 13 }, + /* Func S2 = */ + { 15, 3, 1, 13, 8, 4, 14, 7, 6, 15, 11, 2, 3, 8, 4, 14, + 9, 12, 7, 0, 2, 1, 13, 10, 12, 6, 0, 9, 5, 11, 10, 5, + 0, 13, 14, 8, 7, 10, 11, 1, 10, 3, 4, 15, 13, 4, 1, 2, + 5, 11, 8, 6, 12, 7, 6, 12, 9, 0, 3, 5, 2, 14, 15, 9 }, + /* Func S3 = */ + { 10, 13, 0, 7, 9, 0, 14, 9, 6, 3, 3, 4, 15, 6, 5, 10, + 1, 2, 13, 8, 12, 5, 7, 14, 11, 12, 4, 11, 2, 15, 8, 1, + 13, 1, 6, 10, 4, 13, 9, 0, 8, 6, 15, 9, 3, 8, 0, 7, + 11, 4, 1, 15, 2, 14, 12, 3, 5, 11, 10, 5, 14, 2, 7, 12 }, + /* Func S4 = */ + { 7, 13, 13, 8, 14, 11, 3, 5, 0, 6, 6, 15, 9, 0, 10, 3, + 1, 4, 2, 7, 8, 2, 5, 12, 11, 1, 12, 10, 4, 14, 15, 9, + 10, 3, 6, 15, 9, 0, 0, 6, 12, 10, 11, 1, 7, 13, 13, 8, + 15, 9, 1, 4, 3, 5, 14, 11, 5, 12, 2, 7, 8, 2, 4, 14 }, + /* Func S5 = */ + { 2, 14, 12, 11, 4, 2, 1, 12, 7, 4, 10, 7, 11, 13, 6, 1, + 8, 5, 5, 0, 3, 15, 15, 10, 13, 3, 0, 9, 14, 8, 9, 6, + 4, 11, 2, 8, 1, 12, 11, 7, 10, 1, 13, 14, 7, 2, 8, 13, + 15, 6, 9, 15, 12, 0, 5, 9, 6, 10, 3, 4, 0, 5, 14, 3 }, + /* Func S6 = */ + { 12, 10, 1, 15, 10, 4, 15, 2, 9, 7, 2, 12, 6, 9, 8, 5, + 0, 6, 13, 1, 3, 13, 4, 14, 14, 0, 7, 11, 5, 3, 11, 8, + 9, 4, 14, 3, 15, 2, 5, 12, 2, 9, 8, 5, 12, 15, 3, 10, + 7, 11, 0, 14, 4, 1, 10, 7, 1, 6, 13, 0, 11, 8, 6, 13 }, + /* Func S7 = */ + { 4, 13, 11, 0, 2, 11, 14, 7, 15, 4, 0, 9, 8, 1, 13, 10, + 3, 14, 12, 3, 9, 5, 7, 12, 5, 2, 10, 15, 6, 8, 1, 6, + 1, 6, 4, 11, 11, 13, 13, 8, 12, 1, 3, 4, 7, 10, 14, 7, + 10, 9, 15, 5, 6, 0, 8, 15, 0, 14, 5, 2, 9, 3, 2, 12 }, + /* Func S8 = */ + { 13, 1, 2, 15, 8, 13, 4, 8, 6, 10, 15, 3, 11, 7, 1, 4, + 10, 12, 9, 5, 3, 6, 14, 11, 5, 0, 0, 14, 12, 9, 7, 2, + 7, 2, 11, 1, 4, 14, 1, 7, 9, 4, 12, 10, 14, 8, 2, 13, + 0, 15, 6, 12, 10, 9, 13, 0, 15, 3, 3, 5, 5, 6, 8, 11 } +}; + +/* + * Permutation function for results from s-boxes + * from FIPS 46 pages 12 and 16. + * P = + */ +unsigned char P[32] = { + 16, 7, 20, 21, 29, 12, 28, 17, + 1, 15, 23, 26, 5, 18, 31, 10, + 2, 8, 24, 14, 32, 27, 3, 9, + 19, 13, 30, 6, 22, 11, 4, 25 +}; + +unsigned int Pinv[32]; +unsigned int SP[8][64]; + +void +makePinv(void) +{ + int i; + unsigned int Pi = 0x80000000; + for (i = 0; i < 32; ++i) { + int j = 32 - P[i]; + Pinv[j] = Pi; + Pi >>= 1; + } +} + +void +makeSP(void) +{ + int box; + for (box = 0; box < 8; ++box) { + int item; + printf("/* box S%d */ {\n", box + 1); + for (item = 0; item < 64; ++item) { + unsigned int s = S[box][item]; + unsigned int val = 0; + unsigned int bitnum = (7 - box) * 4; + for (; s; s >>= 1, ++bitnum) { + if (s & 1) { + val |= Pinv[bitnum]; + } + } + val = (val << 3) | (val >> 29); + SP[box][item] = val; + } + for (item = 0; item < 64; item += 4) { + printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", + SP[box][item], SP[box][item + 1], SP[box][item + 2], SP[box][item + 3]); + } + printf(" },\n"); + } +} + +int +main() +{ + makePinv(); + makeSP(); + return 0; +} diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README new file mode 100644 index 0000000000..a49aa9d8d7 --- /dev/null +++ b/security/nss/lib/freebl/mpi/README @@ -0,0 +1,646 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +About the MPI Library +--------------------- + +The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision +signed integer arithmetic package. The implementation is not the most +efficient possible, but the code is small and should be fairly easily +portable to just about any machine that supports an ANSI C compiler, +as long as it is capable of at least 16-bit arithmetic (but also see +below for more on this). + +This library was written with an eye to cryptographic applications; +thus, some care is taken to make sure that temporary values are not +left lying around in memory when they are no longer in use. This adds +some overhead for zeroing buffers before they are released back into +the free pool; however, it gives you the assurance that there is only +one copy of your important values residing in your process's address +space at a time. Obviously, it is difficult to guarantee anything, in +a pre-emptive multitasking environment, but this at least helps you +keep a lid on the more obvious ways your data can get spread around in +memory. + + +Using the Library +----------------- + +To use the MPI library in your program, you must include the header: + +#include "mpi.h" + +This header provides all the type and function declarations you'll +need to use the library. Almost all the names defined by the library +begin with the prefix 'mp_', so it should be easy to keep them from +clashing with your program's namespace (he says, glibly, knowing full +well there are always pathological cases). + +There are a few things you may want to configure about the library. +By default, the MPI library uses an unsigned short for its digit type, +and an unsigned int for its word type. The word type must be big +enough to contain at least two digits, for the primitive arithmetic to +work out. On my machine, a short is 2 bytes and an int is 4 bytes -- +but if you have 64-bit ints, you might want to use a 4-byte digit and +an 8-byte word. I have tested the library using 1-byte digits and +2-byte words, as well. Whatever you choose to do, the things you need +to change are: + +(1) The type definitions for mp_digit and mp_word. + +(2) The macro DIGIT_FMT which tells mp_print() how to display a + single digit. This is just a printf() format string, so you + can adjust it appropriately. + +(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the + largest value expressible in an mp_digit and an mp_word, + respectively. + +Both the mp_digit and mp_word should be UNSIGNED integer types. The +code relies on having the full positive precision of the type used for +digits and words. + +The remaining type definitions should be left alone, for the most +part. The code in the library does not make any significant +assumptions about the sizes of things, but there is little if any +reason to change the other parameters, so I would recommend you leave +them as you found them. + + +Conventions +----------- + +Most functions in the library return a value of type mp_err. This +permits the library to communicate success or various kinds of failure +to the calling program. The return values currently defined are: + + MP_OKAY - okay, operation succeeded, all's well + MP_YES - okay, the answer is yes (same as MP_OKAY) + MP_NO - okay, but answer is no (not MP_OKAY) + MP_MEM - operation ran out of memory + MP_RANGE - input parameter was out of range + MP_BADARG - an invalid input parameter was provided + MP_UNDEF - no output value is defined for this input + +The only function which currently uses MP_UNDEF is mp_invmod(). +Division by zero is undefined, but the division functions will return +MP_RANGE for a zero divisor. MP_BADARG usually means you passed a +bogus mp_int structure to the function. MP_YES and MP_NO are not used +by the library itself; they're defined so you can use them in your own +extensions. + +If you need a readable interpretation of these error codes in your +program, you may also use the mp_strerror() function. This function +takes an mp_err as input, and returns a pointer to a human-readable +string describing the meaning of the error. These strings are stored +as constants within the library, so the caller should not attempt to +modify or free the memory associated with these strings. + +The library represents values in signed-magnitude format. Values +strictly less than zero are negative, all others are considered +positive (zero is positive by fiat). You can access the 'sign' member +of the mp_int structure directly, but better is to use the mp_cmp_z() +function, to find out which side of zero the value lies on. + +Most arithmetic functions have a single-digit variant, as well as the +full arbitrary-precision. An mp_digit is an unsigned value between 0 +and DIGIT_MAX inclusive. The radix is available as RADIX. The number +of bits in a given digit is given as DIGIT_BIT. + +Generally, input parameters are given before output parameters. +Unless otherwise specified, any input parameter can be re-used as an +output parameter, without confusing anything. + +The basic numeric type defined by the library is an mp_int. Virtually +all the functions in the library take a pointer to an mp_int as one of +their parameters. An explanation of how to create and use these +structures follows. And so, without further ado... + + +Initialization and Cleanup +-------------------------- + +The basic numeric type defined by the library is an 'mp_int'. +However, it is not sufficient to simply declare a variable of type +mp_int in your program. These variables also need to be initialized +before they can be used, to allocate the internal storage they require +for computation. + +This is done using one of the following functions: + + mp_init(mp_int *mp); + mp_init_copy(mp_int *mp, mp_int *from); + mp_init_size(mp_int *mp, mp_size p); + +Each of these requires a pointer to a structure of type mp_int. The +basic mp_init() simply initializes the mp_int to a default size, and +sets its value to zero. If you would like to initialize a copy of an +existing mp_int, use mp_init_copy(), where the 'from' parameter is the +mp_int you'd like to make a copy of. The third function, +mp_init_size(), permits you to specify how many digits of precision +should be preallocated for your mp_int. This can help the library +avoid unnecessary re-allocations later on. + +The default precision used by mp_init() can be retrieved using: + + precision = mp_get_prec(); + +This returns the number of digits that will be allocated. You can +change this value by using: + + mp_set_prec(unsigned int prec); + +Any positive value is acceptable -- if you pass zero, the default +precision will be re-set to the compiled-in library default (this is +specified in the header file 'mpi-config.h', and typically defaults to +8 or 16). + +Just as you must allocate an mp_int before you can use it, you must +clean up the structure when you are done with it. This is performed +using the mp_clear() function. Remember that any mp_int that you +create as a local variable in a function must be mp_clear()'d before +that function exits, or else the memory allocated to that mp_int will +be orphaned and unrecoverable. + +To set an mp_int to a given value, the following functions are given: + + mp_set(mp_int *mp, mp_digit d); + mp_set_int(mp_int *mp, long z); + mp_set_ulong(mp_int *mp, unsigned long z); + +The mp_set() function sets the mp_int to a single digit value, while +mp_set_int() sets the mp_int to a signed long integer value. + +To set an mp_int to zero, use: + + mp_zero(mp_int *mp); + + +Copying and Moving +------------------ + +If you have two initialized mp_int's, and you want to copy the value +of one into the other, use: + + mp_copy(from, to) + +This takes care of clearing the old value of 'to', and copies the new +value into it. If 'to' is not yet initialized, use mp_init_copy() +instead (see above). + +Note: The library tries, whenever possible, to avoid allocating +---- new memory. Thus, mp_copy() tries first to satisfy the needs + of the copy by re-using the memory already allocated to 'to'. + Only if this proves insufficient will mp_copy() actually + allocate new memory. + + For this reason, if you know a priori that 'to' has enough + available space to hold 'from', you don't need to check the + return value of mp_copy() for memory failure. The USED() + macro tells you how many digits are used by an mp_int, and + the ALLOC() macro tells you how many are allocated. + +If you have two initialized mp_int's, and you want to exchange their +values, use: + + mp_exch(a, b) + +This is better than using mp_copy() with a temporary, since it will +not (ever) touch the memory allocator -- it just swaps the exact +contents of the two structures. The mp_exch() function cannot fail; +if you pass it an invalid structure, it just ignores it, and does +nothing. + + +Basic Arithmetic +---------------- + +Once you have initialized your integers, you can operate on them. The +basic arithmetic functions on full mp_int values are: + +mp_add(a, b, c) - computes c = a + b +mp_sub(a, b, c) - computes c = a - b +mp_mul(a, b, c) - computes c = a * b +mp_sqr(a, b) - computes b = a * a +mp_div(a, b, q, r) - computes q, r such that a = bq + r +mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d +mp_expt(a, b, c) - computes c = a ** b +mp_2expt(a, k) - computes a = 2^k + +The mp_div_2d() function efficiently computes division by powers of +two. Either the q or r parameter may be NULL, in which case that +portion of the computation will be discarded. + +The algorithms used for some of the computations here are described in +the following files which are included with this distribution: + +mul.txt Describes the multiplication algorithm +div.txt Describes the division algorithm +expt.txt Describes the exponentiation algorithm +sqrt.txt Describes the square-root algorithm +square.txt Describes the squaring algorithm + +There are single-digit versions of most of these routines, as well. +In the following prototypes, 'd' is a single mp_digit: + +mp_add_d(a, d, c) - computes c = a + d +mp_sub_d(a, d, c) - computes c = a - d +mp_mul_d(a, d, c) - computes c = a * d +mp_mul_2(a, c) - computes c = a * 2 +mp_div_d(a, d, q, r) - computes q, r such that a = bq + r +mp_div_2(a, c) - computes c = a / 2 +mp_expt_d(a, d, c) - computes c = a ** d + +The mp_mul_2() and mp_div_2() functions take advantage of the internal +representation of an mp_int to do multiplication by two more quickly +than mp_mul_d() would. Other basic functions of an arithmetic variety +include: + +mp_zero(a) - assign 0 to a +mp_neg(a, c) - negate a: c = -a +mp_abs(a, c) - absolute value: c = |a| + + +Comparisons +----------- + +Several comparison functions are provided. Each of these, unless +otherwise specified, returns zero if the comparands are equal, < 0 if +the first is less than the second, and > 0 if the first is greater +than the second: + +mp_cmp_z(a) - compare a <=> 0 +mp_cmp_d(a, d) - compare a <=> d, d is a single digit +mp_cmp(a, b) - compare a <=> b +mp_cmp_mag(a, b) - compare |a| <=> |b| +mp_isodd(a) - return nonzero if odd, zero otherwise +mp_iseven(a) - return nonzero if even, zero otherwise + + +Modular Arithmetic +------------------ + +Modular variations of the basic arithmetic functions are also +supported. These are available if the MP_MODARITH parameter in +mpi-config.h is turned on (it is by default). The modular arithmetic +functions are: + +mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m +mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below) +mp_addmod(a, b, m, c) - compute c = (a + b) mod m +mp_submod(a, b, m, c) - compute c = (a - b) mod m +mp_mulmod(a, b, m, c) - compute c = (a * b) mod m +mp_sqrmod(a, m, c) - compute c = (a * a) mod m +mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m +mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m + +The mp_sqr() function squares its input argument. A call to mp_sqr(a, +c) is identical in meaning to mp_mul(a, a, c); however, if the +MP_SQUARE variable is set true in mpi-config.h (see below), then it +will be implemented with a different algorithm, that is supposed to +take advantage of the redundant computation that takes place during +squaring. Unfortunately, some compilers result in worse performance +on this code, so you can change the behaviour at will. There is a +utility program "mulsqr.c" that lets you test which does better on +your system. + +The mp_sqrmod() function is analogous to the mp_sqr() function; it +uses the mp_sqr() function rather than mp_mul(), and then performs the +modular reduction. This probably won't help much unless you are doing +a lot of them. + +See the file 'square.txt' for a synopsis of the algorithm used. + +Note: The mp_mod_d() function computes a modular reduction around +---- a single digit d. The result is a single digit c. + +Because an inverse is defined for a (mod m) if and only if (a, m) = 1 +(that is, if a and m are relatively prime), mp_invmod() may not be +able to compute an inverse for the arguments. In this case, it +returns the value MP_UNDEF, and does not modify c. If an inverse is +defined, however, it returns MP_OKAY, and sets c to the value of the +inverse (mod m). + +See the file 'redux.txt' for a description of the modular reduction +algorithm used by mp_exptmod(). + + +Greatest Common Divisor +----------------------- + +If The greates common divisor of two values can be found using one of the +following functions: + +mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm +mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b) +mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b) + +Also provided is a function to compute modular inverses, if they +exist: + +mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists + +The function mp_xgcd() computes the greatest common divisor, and also +returns values of x and y satisfying Bezout's identity. This is used +by mp_invmod() to find modular inverses. However, if you do not need +these values, you will find that mp_gcd() is MUCH more efficient, +since it doesn't need all the intermediate values that mp_xgcd() +requires in order to compute x and y. + +The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD +algorithm due to Josef Stein. + + +Input & Output Functions +------------------------ + +The following basic I/O routines are provided. These are present at +all times: + +mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int +mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int +mp_radix_size(mp, r) - return length of buffer needed by mp_toradix() +mp_raw_size(mp) - return length of buffer needed by mp_toraw() +mp_toradix(mp, str, r) - convert an mp_int to a string of radix r + digits +mp_toraw(mp, str) - convert an mp_int to a string of bytes +mp_tovalue(ch, r) - convert ch to its value when taken as + a radix r digit, or -1 if invalid +mp_strerror(err) - get a string describing mp_err value 'err' + +If you compile the MPI library with MP_IOFUNC defined, you will also +have access to the following additional I/O function: + +mp_print(mp, ofp) - print an mp_int as text to output stream ofp + +Note that mp_radix_size() returns a size in bytes guaranteed to be AT +LEAST big enough for the digits output by mp_toradix(). Because it +uses an approximation technique to figure out how many digits will be +needed, it may return a figure which is larger than necessary. Thus, +the caller should not rely on the value to determine how many bytes +will actually be written by mp_toradix(). The string mp_toradix() +creates will be NUL terminated, so the standard C library function +strlen() should be able to ascertain this for you, if you need it. + +The mp_read_radix() and mp_toradix() functions support bases from 2 to +64 inclusive. If you require more general radix conversion facilities +than this, you will need to write them yourself (that's why mp_div_d() +is provided, after all). + +Note: mp_read_radix() will accept as digits either capital or +---- lower-case letters. However, the current implementation of + mp_toradix() only outputs upper-case letters, when writing + bases betwee 10 and 36. The underlying code supports using + lower-case letters, but the interface stub does not have a + selector for it. You can add one yourself if you think it + is worthwhile -- I do not. Bases from 36 to 64 use lower- + case letters as distinct from upper-case. Bases 63 and + 64 use the characters '+' and '/' as digits. + + Note also that compiling with MP_IOFUNC defined will cause + inclusion of , so if you are trying to write code + which does not depend on the standard C library, you will + probably want to avoid this option. This is needed because + the mp_print() function takes a standard library FILE * as + one of its parameters, and uses the fprintf() function. + +The mp_toraw() function converts the integer to a sequence of bytes, +in big-endian ordering (most-significant byte first). Assuming your +bytes are 8 bits wide, this corresponds to base 256. The sign is +encoded as a single leading byte, whose value is 0 for zero or +positive values, or 1 for negative values. The mp_read_raw() function +reverses this process -- it takes a buffer of bytes, interprets the +first as a sign indicator (0 = zero/positive, nonzero = negative), and +the rest as a sequence of 1-byte digits in big-endian ordering. + +The mp_raw_size() function returns the exact number of bytes required +to store the given integer in "raw" format (as described in the +previous paragraph). Zero is returned in case of error; a valid +integer will require at least three bytes of storage. + +In previous versions of the MPI library, an "external representation +format" was supported. This was removed, however, because I found I +was never using it, it was not as portable as I would have liked, and +I decided it was a waste of space. + + +Other Functions +--------------- + +The files 'mpprime.h' and 'mpprime.c' define some routines which are +useful for divisibility testing and probabilistic primality testing. +The routines defined are: + +mpp_divis(a, b) - is a divisible by b? +mpp_divis_d(a, d) - is a divisible by digit d? +mpp_random(a) - set a to random value at current precision +mpp_random_size(a, prec) - set a to random value at given precision + +Note: The mpp_random() and mpp_random_size() functions use the C +---- library's rand() function to generate random values. It is + up to the caller to seed this generator before it is called. + These functions are not suitable for generating quantities + requiring cryptographic-quality randomness; they are intended + primarily for use in primality testing. + + Note too that the MPI library does not call srand(), so your + application should do this, if you ever want the sequence + to change. + +mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits + in v? If so, let w be the index of + that digit + +mpp_divis_primes(a, np) - is a divisible by any of the first np + primes? If so, set np to the prime + which divided a. + +mpp_fermat(a, d) - test if w^a = w (mod a). If so, + returns MP_YES, otherwise MP_NO. + +mpp_pprime(a, nt) - perform nt iterations of the Rabin- + Miller probabilistic primality test + on a. Returns MP_YES if all tests + passed, or MP_NO if any test fails. + +The mpp_fermat() function works based on Fermat's little theorem, a +consequence of which is that if p is a prime, and (w, p) = 1, then: + + w^p = w (mod p) + +Put another way, if w^p != w (mod p), then p is not prime. The test +is expensive to compute, but it helps to quickly eliminate an enormous +class of composite numbers prior to Rabin-Miller testing. + +Building the Library +-------------------- + +The MPI library is designed to be as self-contained as possible. You +should be able to compile it with your favourite ANSI C compiler, and +link it into your program directly. If you are on a Unix system using +the GNU C compiler (gcc), the following should work: + +% gcc -ansi -pedantic -Wall -O2 -c mpi.c + +The file 'mpi-config.h' defines several configurable parameters for +the library, which you can adjust to suit your application. At the +time of this writing, the available options are: + +MP_IOFUNC - Define true to include the mp_print() function, + which is moderately useful for debugging. This + implicitly includes . + +MP_MODARITH - Define true to include the modular arithmetic + functions. If you don't need modular arithmetic + in your application, you can set this to zero to + leave out all the modular routines. + +MP_LOGTAB - If true, the file "logtab.h" is included, which + is basically a static table of base 2 logarithms. + These are used to compute how big the buffers for + radix conversion need to be. If you set this false, + the library includes and uses log(). This + typically forces you to link against math libraries. + + +MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument + checking macro, ARGCHK(), gets expanded. If this + is set to zero, ARGCHK() expands to nothing; no + argument checks are performed. If this is 1, the + ARGCHK() macro expands to code that returns MP_BADARG + or similar at runtime. If it is 2, ARGCHK() expands + to an assert() call that aborts the program on a + bad input. + +MP_DEBUG - Turns on debugging output. This is probably not at + all useful unless you are debugging the library. It + tends to spit out a LOT of output. + +MP_DEFPREC - The default precision of a newly-created mp_int, in + digits. The precision can be changed at runtime by + the mp_set_prec() function, but this is its initial + value. + +MP_SQUARE - If this is set to a nonzero value, the mp_sqr() + function will use an alternate algorithm that takes + advantage of the redundant inner product computation + when both multiplicands are identical. Unfortunately, + with some compilers this is actually SLOWER than just + calling mp_mul() with the same argument twice. So + if you set MP_SQUARE to zero, mp_sqr() will be expan- + ded into a call to mp_mul(). This applies to all + the uses of mp_sqr(), including mp_sqrmod() and the + internal calls to s_mp_sqr() inside mpi.c + + The program 'mulsqr' (mulsqr.c) can be used to test + which works best for your configuration. Set up the + CC and CFLAGS variables in the Makefile, then type: + + make mulsqr + + Invoke it with arguments similar to the following: + + mulsqr 25000 1024 + + That is, 25000 products computed on 1024-bit values. + The output will compare the two timings, and recommend + a setting for MP_SQUARE. It is off by default. + +If you would like to use the mp_print() function (see above), be sure +to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the +'tests' subdirectory expect this to be defined (although the test +driver 'mpi-test' doesn't need it) + +The Makefile which comes with the library should take care of building +the library for you, if you have set the CC and CFLAGS variables at +the top of the file appropriately. By default, they are set up to +use the GNU C compiler: + +CC=gcc +CFLAGS=-ansi -pedantic -Wall -O2 + +If all goes well, the library should compile without warnings using +this combination. You should, of course, make whatever adjustments +you find necessary. + +The MPI library distribution comes with several additional programs +which are intended to demonstrate the use of the library, and provide +a framework for testing it. There are a handful of test driver +programs, in the files named 'mptest-X.c', where X is a digit. Also, +there are some simple command-line utilities (in the 'utils' +directory) for manipulating large numbers. These include: + +basecvt.c A radix-conversion program, supporting bases from + 2 to 64 inclusive. + +bbsrand.c A BBS (quadratic residue) pseudo-random number + generator. The file 'bbsrand.c' is just the driver + for the program; the real code lives in the files + 'bbs_rand.h' and 'bbs_rand.c' + +dec2hex.c Converts decimal to hexadecimal + +gcd.c Computes the greatest common divisor of two values. + If invoked as 'xgcd', also computes constants x and + y such that (a, b) = ax + by, in accordance with + Bezout's identity. + +hex2dec.c Converts hexadecimal to decimal + +invmod.c Computes modular inverses + +isprime.c Performs the Rabin-Miller probabilistic primality + test on a number. Values which fail this test are + definitely composite, and those which pass are very + likely to be prime (although there are no guarantees) + +lap.c Computes the order (least annihilating power) of + a value v modulo m. Very dumb algorithm. + +primegen.c Generates large (probable) primes. + +prng.c A pseudo-random number generator based on the + BBS generator code in 'bbs_rand.c' + +sieve.c Implements the Sieve of Eratosthenes, using a big + bitmap, to generate a list of prime numbers. + +fact.c Computes the factorial of an arbitrary precision + integer (iterative). + +exptmod.c Computes arbitrary precision modular exponentiation + from the command line (exptmod a b m -> a^b (mod m)) + +Most of these can be built from the Makefile that comes with the +library. Try 'make tools', if your environment supports it. + + +Acknowledgements: +---------------- + +The algorithms used in this library were drawn primarily from Volume +2 of Donald Knuth's magnum opus, _The Art of Computer Programming_, +"Semi-Numerical Methods". Barrett's algorithm for modular reduction +came from Menezes, Oorschot, and Vanstone's _Handbook of Applied +Cryptography_, Chapter 14. + +Thanks are due to Tom St. Denis, for finding an obnoxious sign-related +bug in mp_read_raw() that made things break on platforms which use +signed chars. + +About the Author +---------------- + +This software was written by Michael J. Fromberger. You can contact +the author as follows: + +E-mail: + +Postal: 8000 Cummings Hall, Thayer School of Engineering + Dartmouth College, Hanover, New Hampshire, USA + +PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html + 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907 + +Last updated: 16-Jan-2000 diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE new file mode 100644 index 0000000000..35cca68ce9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/LICENSE @@ -0,0 +1,11 @@ +Within this directory, each of the file listed below is licensed under +the terms given in the file LICENSE-MPL, also in this directory. + +basecvt.pod +gcd.pod +invmod.pod +isprime.pod +lap.pod +mpi-test.pod +prime.txt +prng.pod diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL new file mode 100644 index 0000000000..41dc2327f1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL @@ -0,0 +1,3 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod new file mode 100644 index 0000000000..c3d87fbc7e --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod @@ -0,0 +1,65 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + basecvt - radix conversion for arbitrary precision integers + +=head1 SYNOPSIS + + basecvt [values] + +=head1 DESCRIPTION + +The B program is a command-line tool for converting integers +of arbitrary precision from one radix to another. The current version +supports radix values from 2 (binary) to 64, inclusive. The first two +command line arguments specify the input and output radix, in base 10. +Any further arguments are taken to be integers notated in the input +radix, and these are converted to the output radix. The output is +written, one integer per line, to standard output. + +When reading integers, only digits considered "valid" for the input +radix are considered. Processing of an integer terminates when an +invalid input digit is encountered. So, for example, if you set the +input radix to 10 and enter '10ACF', B would assume that you +had entered '10' and ignore the rest of the string. + +If no values are provided, no output is written, but the program +simply terminates with a zero exit status. Error diagnostics are +written to standard error in the event of out-of-range radix +specifications. Regardless of the actual values of the input and +output radix, the radix arguments are taken to be in base 10 (decimal) +notation. + +=head1 DIGITS + +For radices from 2-10, standard ASCII decimal digits 0-9 are used for +both input and output. For radices from 11-36, the ASCII letters A-Z +are also included, following the convention used in hexadecimal. In +this range, input is accepted in either upper or lower case, although +on output only lower-case letters are used. + +For radices from 37-62, the output includes both upper- and lower-case +ASCII letters, and case matters. In this range, case is distinguished +both for input and for output values. + +For radices 63 and 64, the characters '+' (plus) and '/' (forward +solidus) are also used. These are derived from the MIME base64 +encoding scheme. The overall encoding is not the same as base64, +because the ASCII digits are used for the bottom of the range, and the +letters are shifted upward; however, the output will consist of the +same character set. + +This input and output behaviour is inherited from the MPI library used +by B, and so is not configurable at runtime. + +=head1 SEE ALSO + + dec2hex(1), hex2dec(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build new file mode 100755 index 0000000000..4d75b1e5a2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/build @@ -0,0 +1,30 @@ +#!/bin/sh +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +VERS="1.7p6" +SECT="1" +NAME="MPI Tools" + +echo "Building manual pages ..." +case $# in + 0) + files=`ls *.pod` + ;; + *) + files=$* + ;; +esac + +for name in $files +do + echo -n "$name ... " +# sname=`noext $name` + sname=`basename $name .pod` + pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT + echo "(done)" +done + +echo "Finished building." + diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt new file mode 100644 index 0000000000..c13fb6ef18 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/div.txt @@ -0,0 +1,64 @@ +Division + +This describes the division algorithm used by the MPI library. + +Input: a, b; a > b +Compute: Q, R; a = Qb + R + +The input numbers are normalized so that the high-order digit of b is +at least half the radix. This guarantees that we have a reasonable +way to guess at the digits of the quotient (this method was taken from +Knuth, vol. 2, with adaptations). + +To normalize, test the high-order digit of b. If it is less than half +the radix, multiply both a and b by d, where: + + radix - 1 + d = ----------- + bmax + 1 + +...where bmax is the high-order digit of b. Otherwise, set d = 1. + +Given normalize values for a and b, let the notation a[n] denote the +nth digit of a. Let #a be the number of significant figures of a (not +including any leading zeroes). + + Let R = 0 + Let p = #a - 1 + + while(p >= 0) + do + R = (R * radix) + a[p] + p = p - 1 + while(R < b and p >= 0) + + if(R < b) + break + + q = (R[#R - 1] * radix) + R[#R - 2] + q = q / b[#b - 1] + + T = b * q + + while(T > L) + q = q - 1 + T = T - b + endwhile + + L = L - T + + Q = (Q * radix) + q + + endwhile + +At this point, Q is the quotient, and R is the normalized remainder. +To denormalize R, compute: + + R = (R / d) + +At this point, you are finished. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt new file mode 100644 index 0000000000..bd9d6f1960 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/expt.txt @@ -0,0 +1,94 @@ +Exponentiation + +For exponentiation, the MPI library uses a simple and fairly standard +square-and-multiply method. The algorithm is this: + +Input: a, b +Output: a ** b + + s = 1 + + while(b != 0) + if(b is odd) + s = s * a + endif + + b = b / 2 + + x = x * x + endwhile + + return s + +The modular exponentiation is done the same way, except replacing: + + s = s * a + +with + s = (s * a) mod m + +and replacing + + x = x * x + +with + + x = (x * x) mod m + +Here is a sample exponentiation using the MPI library, as compared to +the same problem solved by the Unix 'bc' program on my system: + +Computation of 2,381,283 ** 235 + +'bc' says: + +4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\ +4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\ +6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\ +4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\ +6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\ +FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\ +CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\ +5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\ +CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\ +49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\ +5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\ +A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\ +D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\ +92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\ +A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\ +AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\ +E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\ +1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\ +CFFF2E1AC93F3CA264A1B + +MPI says: + +4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\ +4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\ +6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\ +4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\ +6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\ +FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\ +CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\ +5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\ +CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\ +49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\ +5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\ +A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\ +D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\ +92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\ +A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\ +AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\ +E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\ +1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\ +CFFF2E1AC93F3CA264A1B + +Diff says: +% diff bc.txt mp.txt +% + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod new file mode 100644 index 0000000000..b5b8fa34fd --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/gcd.pod @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + gcd - compute greatest common divisor of two integers + +=head1 SYNOPSIS + + gcd + +=head1 DESCRIPTION + +The B program computes the greatest common divisor of two +arbitrary-precision integers I and I. The result is written in +standard decimal notation to the standard output. + +If I is zero, B will print an error message and exit. + +=head1 SEE ALSO + +invmod(1), isprime(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod new file mode 100644 index 0000000000..0194f44884 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/invmod.pod @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + invmod - compute modular inverse of an integer + +=head1 SYNOPSIS + + invmod + +=head1 DESCRIPTION + +The B program computes the inverse of I, modulo I, if +that inverse exists. Both I and I are arbitrary-precision +integers in decimal notation. The result is written in standard +decimal notation to the standard output. + +If there is no inverse, the message: + + No inverse + +...will be printed to the standard output (an inverse exists if and +only if the greatest common divisor of I and I is 1). + +=head1 SEE ALSO + +gcd(1), isprime(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod new file mode 100644 index 0000000000..a8ec1f7ee3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/isprime.pod @@ -0,0 +1,63 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + isprime - probabilistic primality testing + +=head1 SYNOPSIS + + isprime + +=head1 DESCRIPTION + +The B program attempts to determine whether the arbitrary +precision integer I is prime. It first tests I for divisibility +by the first 170 or so small primes, and assuming I is not +divisible by any of these, applies 15 iterations of the Rabin-Miller +probabilistic primality test. + +If the program discovers that the number is composite, it will print: + + Not prime (reason) + +Where I is either: + + divisible by small prime x + +Or: + + failed nth pseudoprime test + +In the first case, I indicates the first small prime factor that +was found. In the second case, I indicates which of the +pseudoprime tests failed (numbered from 1) + +If this happens, the number is definitely not prime. However, if the +number succeeds, this message results: + + Probably prime, 1 in 4^15 chance of false positive + +If this happens, the number is prime with very high probability, but +its primality has not been absolutely proven, only demonstrated to a +very convincing degree. + +The value I can be input in standard decimal notation, or, if it is +prefixed with I, it will be read as hexadecimal. + +=head1 ENVIRONMENT + +You can control how many iterations of Rabin-Miller are performed on +the candidate number by setting the I environment variable +to an integer value before starting up B. This will change +the output slightly if the number passes all the tests. + +=head1 SEE ALSO + +gcd(1), invmod(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod new file mode 100644 index 0000000000..47539fbbf9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/lap.pod @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + lap - compute least annihilating power of a number + +=head1 SYNOPSIS + + lap + +=head1 DESCRIPTION + +The B program computes the order of I modulo I, for +arbitrary precision integers I and I. The B of I +modulo I is defined as the smallest positive value I for which +I raised to the Ith power, modulo I, is equal to 1. The +order may not exist, if I is composite. + +=head1 RESTRICTIONS + +This program is very slow, especially for large moduli. It is +intended as a way to help find primitive elements in a modular field, +but it does not do so in a particularly inefficient manner. It was +written simply to help verify that a particular candidate does not +have an obviously short cycle mod I. + +=head1 SEE ALSO + +gcd(1), invmod(1), isprime(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod new file mode 100644 index 0000000000..b05f866e5e --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + mpi-test - automated test program for MPI library + +=head1 SYNOPSIS + + mpi-test [quiet] + mpi-test list + mpi-test help + +=head1 DESCRIPTION + +The B program is a general unit test driver for the MPI +library. It is used to verify that the library works as it is +supposed to on your architecture. As with most such things, passing +all the tests in B does not guarantee the code is correct, +but if any of them fail, there are certainly problems. + +Each major function of the library can be tested individually. For a +list of the test suites understood by B, run it with the +I command line option: + + mpi-test list + +This will display a list of the available test suites and a brief +synopsis of what each one does. For a brief overview of this +document, run B I. + +B exits with a zero status if the selected test succeeds, or +a nonzero status if it fails. If a I which is not +understood by B is given, a diagnostic is printed to the +standard error, and the program exits with a result code of 2. If a +test fails, the result code will be 1, and a diagnostic is ordinarily +printed to the standard error. However, if the I option is +provided, these diagnostics will be suppressed. + +=head1 RESTRICTIONS + +Only a few canned test cases are provided. The solutions have been +verified using the GNU bc(1) program, so bugs there may cause problems +here; however, this is very unlikely, so if a test fails, it is almost +certainly my fault, not bc(1)'s. + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt new file mode 100644 index 0000000000..975f56ddbe --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/mul.txt @@ -0,0 +1,77 @@ +Multiplication + +This describes the multiplication algorithm used by the MPI library. + +This is basically a standard "schoolbook" algorithm. It is slow -- +O(mn) for m = #a, n = #b -- but easy to implement and verify. +Basically, we run two nested loops, as illustrated here (R is the +radix): + +k = 0 +for j <- 0 to (#b - 1) + for i <- 0 to (#a - 1) + w = (a[j] * b[i]) + k + c[i+j] + c[i+j] = w mod R + k = w div R + endfor + c[i+j] = k; + k = 0; +endfor + +It is necessary that 'w' have room for at least two radix R digits. +The product of any two digits in radix R is at most: + + (R - 1)(R - 1) = R^2 - 2R + 1 + +Since a two-digit radix-R number can hold R^2 - 1 distinct values, +this insures that the product will fit into the two-digit register. + +To insure that two digits is enough for w, we must also show that +there is room for the carry-in from the previous multiplication, and +the current value of the product digit that is being recomputed. +Assuming each of these may be as big as R - 1 (and no larger, +certainly), two digits will be enough if and only if: + + (R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1 + +Solving this equation shows that, indeed, this is the case: + + R^2 - 2R + 1 + 2R - 2 <= R^2 - 1 + + R^2 - 1 <= R^2 - 1 + +This suggests that a good radix would be one more than the largest +value that can be held in half a machine word -- so, for example, as +in this implementation, where we used a radix of 65536 on a machine +with 4-byte words. Another advantage of a radix of this sort is that +binary-level operations are easy on numbers in this representation. + +Here's an example multiplication worked out longhand in radix-10, +using the above algorithm: + + a = 999 + b = x 999 + ------------- + p = 98001 + +w = (a[jx] * b[ix]) + kin + c[ix + jx] +c[ix+jx] = w % RADIX +k = w / RADIX + product +ix jx a[jx] b[ix] kin w c[i+j] kout 000000 +0 0 9 9 0 81+0+0 1 8 000001 +0 1 9 9 8 81+8+0 9 8 000091 +0 2 9 9 8 81+8+0 9 8 000991 + 8 0 008991 +1 0 9 9 0 81+0+9 0 9 008901 +1 1 9 9 9 81+9+9 9 9 008901 +1 2 9 9 9 81+9+8 8 9 008901 + 9 0 098901 +2 0 9 9 0 81+0+9 0 9 098001 +2 1 9 9 9 81+9+8 8 9 098001 +2 2 9 9 9 81+9+9 9 9 098001 + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt new file mode 100644 index 0000000000..a6ef91137f --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/pi.txt @@ -0,0 +1,53 @@ +This file describes how pi is computed by the program in 'pi.c' (see +the utils subdirectory). + +Basically, we use Machin's formula, which is what everyone in the +world uses as a simple method for computing approximations to pi. +This works for up to a few thousand digits without too much effort. +Beyond that, though, it gets too slow. + +Machin's formula states: + + pi := 16 * arctan(1/5) - 4 * arctan(1/239) + +We compute this in integer arithmetic by first multiplying everything +through by 10^d, where 'd' is the number of digits of pi we wanted to +compute. It turns out, the last few digits will be wrong, but the +number that are wrong is usually very small (ordinarly only 2-3). +Having done this, we compute the arctan() function using the formula: + + 1 1 1 1 1 + arctan(1/x) := --- - ----- + ----- - ----- + ----- - ... + x 3 x^3 5 x^5 7 x^7 9 x^9 + +This is done iteratively by computing the first term manually, and +then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the +current figure. This is then added to (or subtracted from) a running +sum, as appropriate. The iteration continues until we overflow our +available precision and the current figure goes to zero under integer +division. At that point, we're finished. + +Actually, we get a couple extra bits of precision out of the fact that +we know we're computing y * arctan(1/x), by setting up the multiplier +as: + + y * 10^d + +... instead of just 10^d. There is also a bit of cleverness in how +the loop is constructed, to avoid special-casing the first term. +Check out the code for arctan() in 'pi.c', if you are interested in +seeing how it is set up. + +Thanks to Jason P. for this algorithm, which I assembled from notes +and programs found on his cool "Pile of Pi Programs" page, at: + + http://www.isr.umd.edu/~jasonp/pipage.html + +Thanks also to Henrik Johansson , from +whose pi program I borrowed the clever idea of pre-multiplying by x in +order to avoid a special case on the loop iteration. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt new file mode 100644 index 0000000000..694797d5f3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/prime.txt @@ -0,0 +1,6542 @@ +2 +3 +5 +7 +11 +13 +17 +19 +23 +29 +31 +37 +41 +43 +47 +53 +59 +61 +67 +71 +73 +79 +83 +89 +97 +101 +103 +107 +109 +113 +127 +131 +137 +139 +149 +151 +157 +163 +167 +173 +179 +181 +191 +193 +197 +199 +211 +223 +227 +229 +233 +239 +241 +251 +257 +263 +269 +271 +277 +281 +283 +293 +307 +311 +313 +317 +331 +337 +347 +349 +353 +359 +367 +373 +379 +383 +389 +397 +401 +409 +419 +421 +431 +433 +439 +443 +449 +457 +461 +463 +467 +479 +487 +491 +499 +503 +509 +521 +523 +541 +547 +557 +563 +569 +571 +577 +587 +593 +599 +601 +607 +613 +617 +619 +631 +641 +643 +647 +653 +659 +661 +673 +677 +683 +691 +701 +709 +719 +727 +733 +739 +743 +751 +757 +761 +769 +773 +787 +797 +809 +811 +821 +823 +827 +829 +839 +853 +857 +859 +863 +877 +881 +883 +887 +907 +911 +919 +929 +937 +941 +947 +953 +967 +971 +977 +983 +991 +997 +1009 +1013 +1019 +1021 +1031 +1033 +1039 +1049 +1051 +1061 +1063 +1069 +1087 +1091 +1093 +1097 +1103 +1109 +1117 +1123 +1129 +1151 +1153 +1163 +1171 +1181 +1187 +1193 +1201 +1213 +1217 +1223 +1229 +1231 +1237 +1249 +1259 +1277 +1279 +1283 +1289 +1291 +1297 +1301 +1303 +1307 +1319 +1321 +1327 +1361 +1367 +1373 +1381 +1399 +1409 +1423 +1427 +1429 +1433 +1439 +1447 +1451 +1453 +1459 +1471 +1481 +1483 +1487 +1489 +1493 +1499 +1511 +1523 +1531 +1543 +1549 +1553 +1559 +1567 +1571 +1579 +1583 +1597 +1601 +1607 +1609 +1613 +1619 +1621 +1627 +1637 +1657 +1663 +1667 +1669 +1693 +1697 +1699 +1709 +1721 +1723 +1733 +1741 +1747 +1753 +1759 +1777 +1783 +1787 +1789 +1801 +1811 +1823 +1831 +1847 +1861 +1867 +1871 +1873 +1877 +1879 +1889 +1901 +1907 +1913 +1931 +1933 +1949 +1951 +1973 +1979 +1987 +1993 +1997 +1999 +2003 +2011 +2017 +2027 +2029 +2039 +2053 +2063 +2069 +2081 +2083 +2087 +2089 +2099 +2111 +2113 +2129 +2131 +2137 +2141 +2143 +2153 +2161 +2179 +2203 +2207 +2213 +2221 +2237 +2239 +2243 +2251 +2267 +2269 +2273 +2281 +2287 +2293 +2297 +2309 +2311 +2333 +2339 +2341 +2347 +2351 +2357 +2371 +2377 +2381 +2383 +2389 +2393 +2399 +2411 +2417 +2423 +2437 +2441 +2447 +2459 +2467 +2473 +2477 +2503 +2521 +2531 +2539 +2543 +2549 +2551 +2557 +2579 +2591 +2593 +2609 +2617 +2621 +2633 +2647 +2657 +2659 +2663 +2671 +2677 +2683 +2687 +2689 +2693 +2699 +2707 +2711 +2713 +2719 +2729 +2731 +2741 +2749 +2753 +2767 +2777 +2789 +2791 +2797 +2801 +2803 +2819 +2833 +2837 +2843 +2851 +2857 +2861 +2879 +2887 +2897 +2903 +2909 +2917 +2927 +2939 +2953 +2957 +2963 +2969 +2971 +2999 +3001 +3011 +3019 +3023 +3037 +3041 +3049 +3061 +3067 +3079 +3083 +3089 +3109 +3119 +3121 +3137 +3163 +3167 +3169 +3181 +3187 +3191 +3203 +3209 +3217 +3221 +3229 +3251 +3253 +3257 +3259 +3271 +3299 +3301 +3307 +3313 +3319 +3323 +3329 +3331 +3343 +3347 +3359 +3361 +3371 +3373 +3389 +3391 +3407 +3413 +3433 +3449 +3457 +3461 +3463 +3467 +3469 +3491 +3499 +3511 +3517 +3527 +3529 +3533 +3539 +3541 +3547 +3557 +3559 +3571 +3581 +3583 +3593 +3607 +3613 +3617 +3623 +3631 +3637 +3643 +3659 +3671 +3673 +3677 +3691 +3697 +3701 +3709 +3719 +3727 +3733 +3739 +3761 +3767 +3769 +3779 +3793 +3797 +3803 +3821 +3823 +3833 +3847 +3851 +3853 +3863 +3877 +3881 +3889 +3907 +3911 +3917 +3919 +3923 +3929 +3931 +3943 +3947 +3967 +3989 +4001 +4003 +4007 +4013 +4019 +4021 +4027 +4049 +4051 +4057 +4073 +4079 +4091 +4093 +4099 +4111 +4127 +4129 +4133 +4139 +4153 +4157 +4159 +4177 +4201 +4211 +4217 +4219 +4229 +4231 +4241 +4243 +4253 +4259 +4261 +4271 +4273 +4283 +4289 +4297 +4327 +4337 +4339 +4349 +4357 +4363 +4373 +4391 +4397 +4409 +4421 +4423 +4441 +4447 +4451 +4457 +4463 +4481 +4483 +4493 +4507 +4513 +4517 +4519 +4523 +4547 +4549 +4561 +4567 +4583 +4591 +4597 +4603 +4621 +4637 +4639 +4643 +4649 +4651 +4657 +4663 +4673 +4679 +4691 +4703 +4721 +4723 +4729 +4733 +4751 +4759 +4783 +4787 +4789 +4793 +4799 +4801 +4813 +4817 +4831 +4861 +4871 +4877 +4889 +4903 +4909 +4919 +4931 +4933 +4937 +4943 +4951 +4957 +4967 +4969 +4973 +4987 +4993 +4999 +5003 +5009 +5011 +5021 +5023 +5039 +5051 +5059 +5077 +5081 +5087 +5099 +5101 +5107 +5113 +5119 +5147 +5153 +5167 +5171 +5179 +5189 +5197 +5209 +5227 +5231 +5233 +5237 +5261 +5273 +5279 +5281 +5297 +5303 +5309 +5323 +5333 +5347 +5351 +5381 +5387 +5393 +5399 +5407 +5413 +5417 +5419 +5431 +5437 +5441 +5443 +5449 +5471 +5477 +5479 +5483 +5501 +5503 +5507 +5519 +5521 +5527 +5531 +5557 +5563 +5569 +5573 +5581 +5591 +5623 +5639 +5641 +5647 +5651 +5653 +5657 +5659 +5669 +5683 +5689 +5693 +5701 +5711 +5717 +5737 +5741 +5743 +5749 +5779 +5783 +5791 +5801 +5807 +5813 +5821 +5827 +5839 +5843 +5849 +5851 +5857 +5861 +5867 +5869 +5879 +5881 +5897 +5903 +5923 +5927 +5939 +5953 +5981 +5987 +6007 +6011 +6029 +6037 +6043 +6047 +6053 +6067 +6073 +6079 +6089 +6091 +6101 +6113 +6121 +6131 +6133 +6143 +6151 +6163 +6173 +6197 +6199 +6203 +6211 +6217 +6221 +6229 +6247 +6257 +6263 +6269 +6271 +6277 +6287 +6299 +6301 +6311 +6317 +6323 +6329 +6337 +6343 +6353 +6359 +6361 +6367 +6373 +6379 +6389 +6397 +6421 +6427 +6449 +6451 +6469 +6473 +6481 +6491 +6521 +6529 +6547 +6551 +6553 +6563 +6569 +6571 +6577 +6581 +6599 +6607 +6619 +6637 +6653 +6659 +6661 +6673 +6679 +6689 +6691 +6701 +6703 +6709 +6719 +6733 +6737 +6761 +6763 +6779 +6781 +6791 +6793 +6803 +6823 +6827 +6829 +6833 +6841 +6857 +6863 +6869 +6871 +6883 +6899 +6907 +6911 +6917 +6947 +6949 +6959 +6961 +6967 +6971 +6977 +6983 +6991 +6997 +7001 +7013 +7019 +7027 +7039 +7043 +7057 +7069 +7079 +7103 +7109 +7121 +7127 +7129 +7151 +7159 +7177 +7187 +7193 +7207 +7211 +7213 +7219 +7229 +7237 +7243 +7247 +7253 +7283 +7297 +7307 +7309 +7321 +7331 +7333 +7349 +7351 +7369 +7393 +7411 +7417 +7433 +7451 +7457 +7459 +7477 +7481 +7487 +7489 +7499 +7507 +7517 +7523 +7529 +7537 +7541 +7547 +7549 +7559 +7561 +7573 +7577 +7583 +7589 +7591 +7603 +7607 +7621 +7639 +7643 +7649 +7669 +7673 +7681 +7687 +7691 +7699 +7703 +7717 +7723 +7727 +7741 +7753 +7757 +7759 +7789 +7793 +7817 +7823 +7829 +7841 +7853 +7867 +7873 +7877 +7879 +7883 +7901 +7907 +7919 +7927 +7933 +7937 +7949 +7951 +7963 +7993 +8009 +8011 +8017 +8039 +8053 +8059 +8069 +8081 +8087 +8089 +8093 +8101 +8111 +8117 +8123 +8147 +8161 +8167 +8171 +8179 +8191 +8209 +8219 +8221 +8231 +8233 +8237 +8243 +8263 +8269 +8273 +8287 +8291 +8293 +8297 +8311 +8317 +8329 +8353 +8363 +8369 +8377 +8387 +8389 +8419 +8423 +8429 +8431 +8443 +8447 +8461 +8467 +8501 +8513 +8521 +8527 +8537 +8539 +8543 +8563 +8573 +8581 +8597 +8599 +8609 +8623 +8627 +8629 +8641 +8647 +8663 +8669 +8677 +8681 +8689 +8693 +8699 +8707 +8713 +8719 +8731 +8737 +8741 +8747 +8753 +8761 +8779 +8783 +8803 +8807 +8819 +8821 +8831 +8837 +8839 +8849 +8861 +8863 +8867 +8887 +8893 +8923 +8929 +8933 +8941 +8951 +8963 +8969 +8971 +8999 +9001 +9007 +9011 +9013 +9029 +9041 +9043 +9049 +9059 +9067 +9091 +9103 +9109 +9127 +9133 +9137 +9151 +9157 +9161 +9173 +9181 +9187 +9199 +9203 +9209 +9221 +9227 +9239 +9241 +9257 +9277 +9281 +9283 +9293 +9311 +9319 +9323 +9337 +9341 +9343 +9349 +9371 +9377 +9391 +9397 +9403 +9413 +9419 +9421 +9431 +9433 +9437 +9439 +9461 +9463 +9467 +9473 +9479 +9491 +9497 +9511 +9521 +9533 +9539 +9547 +9551 +9587 +9601 +9613 +9619 +9623 +9629 +9631 +9643 +9649 +9661 +9677 +9679 +9689 +9697 +9719 +9721 +9733 +9739 +9743 +9749 +9767 +9769 +9781 +9787 +9791 +9803 +9811 +9817 +9829 +9833 +9839 +9851 +9857 +9859 +9871 +9883 +9887 +9901 +9907 +9923 +9929 +9931 +9941 +9949 +9967 +9973 +10007 +10009 +10037 +10039 +10061 +10067 +10069 +10079 +10091 +10093 +10099 +10103 +10111 +10133 +10139 +10141 +10151 +10159 +10163 +10169 +10177 +10181 +10193 +10211 +10223 +10243 +10247 +10253 +10259 +10267 +10271 +10273 +10289 +10301 +10303 +10313 +10321 +10331 +10333 +10337 +10343 +10357 +10369 +10391 +10399 +10427 +10429 +10433 +10453 +10457 +10459 +10463 +10477 +10487 +10499 +10501 +10513 +10529 +10531 +10559 +10567 +10589 +10597 +10601 +10607 +10613 +10627 +10631 +10639 +10651 +10657 +10663 +10667 +10687 +10691 +10709 +10711 +10723 +10729 +10733 +10739 +10753 +10771 +10781 +10789 +10799 +10831 +10837 +10847 +10853 +10859 +10861 +10867 +10883 +10889 +10891 +10903 +10909 +10937 +10939 +10949 +10957 +10973 +10979 +10987 +10993 +11003 +11027 +11047 +11057 +11059 +11069 +11071 +11083 +11087 +11093 +11113 +11117 +11119 +11131 +11149 +11159 +11161 +11171 +11173 +11177 +11197 +11213 +11239 +11243 +11251 +11257 +11261 +11273 +11279 +11287 +11299 +11311 +11317 +11321 +11329 +11351 +11353 +11369 +11383 +11393 +11399 +11411 +11423 +11437 +11443 +11447 +11467 +11471 +11483 +11489 +11491 +11497 +11503 +11519 +11527 +11549 +11551 +11579 +11587 +11593 +11597 +11617 +11621 +11633 +11657 +11677 +11681 +11689 +11699 +11701 +11717 +11719 +11731 +11743 +11777 +11779 +11783 +11789 +11801 +11807 +11813 +11821 +11827 +11831 +11833 +11839 +11863 +11867 +11887 +11897 +11903 +11909 +11923 +11927 +11933 +11939 +11941 +11953 +11959 +11969 +11971 +11981 +11987 +12007 +12011 +12037 +12041 +12043 +12049 +12071 +12073 +12097 +12101 +12107 +12109 +12113 +12119 +12143 +12149 +12157 +12161 +12163 +12197 +12203 +12211 +12227 +12239 +12241 +12251 +12253 +12263 +12269 +12277 +12281 +12289 +12301 +12323 +12329 +12343 +12347 +12373 +12377 +12379 +12391 +12401 +12409 +12413 +12421 +12433 +12437 +12451 +12457 +12473 +12479 +12487 +12491 +12497 +12503 +12511 +12517 +12527 +12539 +12541 +12547 +12553 +12569 +12577 +12583 +12589 +12601 +12611 +12613 +12619 +12637 +12641 +12647 +12653 +12659 +12671 +12689 +12697 +12703 +12713 +12721 +12739 +12743 +12757 +12763 +12781 +12791 +12799 +12809 +12821 +12823 +12829 +12841 +12853 +12889 +12893 +12899 +12907 +12911 +12917 +12919 +12923 +12941 +12953 +12959 +12967 +12973 +12979 +12983 +13001 +13003 +13007 +13009 +13033 +13037 +13043 +13049 +13063 +13093 +13099 +13103 +13109 +13121 +13127 +13147 +13151 +13159 +13163 +13171 +13177 +13183 +13187 +13217 +13219 +13229 +13241 +13249 +13259 +13267 +13291 +13297 +13309 +13313 +13327 +13331 +13337 +13339 +13367 +13381 +13397 +13399 +13411 +13417 +13421 +13441 +13451 +13457 +13463 +13469 +13477 +13487 +13499 +13513 +13523 +13537 +13553 +13567 +13577 +13591 +13597 +13613 +13619 +13627 +13633 +13649 +13669 +13679 +13681 +13687 +13691 +13693 +13697 +13709 +13711 +13721 +13723 +13729 +13751 +13757 +13759 +13763 +13781 +13789 +13799 +13807 +13829 +13831 +13841 +13859 +13873 +13877 +13879 +13883 +13901 +13903 +13907 +13913 +13921 +13931 +13933 +13963 +13967 +13997 +13999 +14009 +14011 +14029 +14033 +14051 +14057 +14071 +14081 +14083 +14087 +14107 +14143 +14149 +14153 +14159 +14173 +14177 +14197 +14207 +14221 +14243 +14249 +14251 +14281 +14293 +14303 +14321 +14323 +14327 +14341 +14347 +14369 +14387 +14389 +14401 +14407 +14411 +14419 +14423 +14431 +14437 +14447 +14449 +14461 +14479 +14489 +14503 +14519 +14533 +14537 +14543 +14549 +14551 +14557 +14561 +14563 +14591 +14593 +14621 +14627 +14629 +14633 +14639 +14653 +14657 +14669 +14683 +14699 +14713 +14717 +14723 +14731 +14737 +14741 +14747 +14753 +14759 +14767 +14771 +14779 +14783 +14797 +14813 +14821 +14827 +14831 +14843 +14851 +14867 +14869 +14879 +14887 +14891 +14897 +14923 +14929 +14939 +14947 +14951 +14957 +14969 +14983 +15013 +15017 +15031 +15053 +15061 +15073 +15077 +15083 +15091 +15101 +15107 +15121 +15131 +15137 +15139 +15149 +15161 +15173 +15187 +15193 +15199 +15217 +15227 +15233 +15241 +15259 +15263 +15269 +15271 +15277 +15287 +15289 +15299 +15307 +15313 +15319 +15329 +15331 +15349 +15359 +15361 +15373 +15377 +15383 +15391 +15401 +15413 +15427 +15439 +15443 +15451 +15461 +15467 +15473 +15493 +15497 +15511 +15527 +15541 +15551 +15559 +15569 +15581 +15583 +15601 +15607 +15619 +15629 +15641 +15643 +15647 +15649 +15661 +15667 +15671 +15679 +15683 +15727 +15731 +15733 +15737 +15739 +15749 +15761 +15767 +15773 +15787 +15791 +15797 +15803 +15809 +15817 +15823 +15859 +15877 +15881 +15887 +15889 +15901 +15907 +15913 +15919 +15923 +15937 +15959 +15971 +15973 +15991 +16001 +16007 +16033 +16057 +16061 +16063 +16067 +16069 +16073 +16087 +16091 +16097 +16103 +16111 +16127 +16139 +16141 +16183 +16187 +16189 +16193 +16217 +16223 +16229 +16231 +16249 +16253 +16267 +16273 +16301 +16319 +16333 +16339 +16349 +16361 +16363 +16369 +16381 +16411 +16417 +16421 +16427 +16433 +16447 +16451 +16453 +16477 +16481 +16487 +16493 +16519 +16529 +16547 +16553 +16561 +16567 +16573 +16603 +16607 +16619 +16631 +16633 +16649 +16651 +16657 +16661 +16673 +16691 +16693 +16699 +16703 +16729 +16741 +16747 +16759 +16763 +16787 +16811 +16823 +16829 +16831 +16843 +16871 +16879 +16883 +16889 +16901 +16903 +16921 +16927 +16931 +16937 +16943 +16963 +16979 +16981 +16987 +16993 +17011 +17021 +17027 +17029 +17033 +17041 +17047 +17053 +17077 +17093 +17099 +17107 +17117 +17123 +17137 +17159 +17167 +17183 +17189 +17191 +17203 +17207 +17209 +17231 +17239 +17257 +17291 +17293 +17299 +17317 +17321 +17327 +17333 +17341 +17351 +17359 +17377 +17383 +17387 +17389 +17393 +17401 +17417 +17419 +17431 +17443 +17449 +17467 +17471 +17477 +17483 +17489 +17491 +17497 +17509 +17519 +17539 +17551 +17569 +17573 +17579 +17581 +17597 +17599 +17609 +17623 +17627 +17657 +17659 +17669 +17681 +17683 +17707 +17713 +17729 +17737 +17747 +17749 +17761 +17783 +17789 +17791 +17807 +17827 +17837 +17839 +17851 +17863 +17881 +17891 +17903 +17909 +17911 +17921 +17923 +17929 +17939 +17957 +17959 +17971 +17977 +17981 +17987 +17989 +18013 +18041 +18043 +18047 +18049 +18059 +18061 +18077 +18089 +18097 +18119 +18121 +18127 +18131 +18133 +18143 +18149 +18169 +18181 +18191 +18199 +18211 +18217 +18223 +18229 +18233 +18251 +18253 +18257 +18269 +18287 +18289 +18301 +18307 +18311 +18313 +18329 +18341 +18353 +18367 +18371 +18379 +18397 +18401 +18413 +18427 +18433 +18439 +18443 +18451 +18457 +18461 +18481 +18493 +18503 +18517 +18521 +18523 +18539 +18541 +18553 +18583 +18587 +18593 +18617 +18637 +18661 +18671 +18679 +18691 +18701 +18713 +18719 +18731 +18743 +18749 +18757 +18773 +18787 +18793 +18797 +18803 +18839 +18859 +18869 +18899 +18911 +18913 +18917 +18919 +18947 +18959 +18973 +18979 +19001 +19009 +19013 +19031 +19037 +19051 +19069 +19073 +19079 +19081 +19087 +19121 +19139 +19141 +19157 +19163 +19181 +19183 +19207 +19211 +19213 +19219 +19231 +19237 +19249 +19259 +19267 +19273 +19289 +19301 +19309 +19319 +19333 +19373 +19379 +19381 +19387 +19391 +19403 +19417 +19421 +19423 +19427 +19429 +19433 +19441 +19447 +19457 +19463 +19469 +19471 +19477 +19483 +19489 +19501 +19507 +19531 +19541 +19543 +19553 +19559 +19571 +19577 +19583 +19597 +19603 +19609 +19661 +19681 +19687 +19697 +19699 +19709 +19717 +19727 +19739 +19751 +19753 +19759 +19763 +19777 +19793 +19801 +19813 +19819 +19841 +19843 +19853 +19861 +19867 +19889 +19891 +19913 +19919 +19927 +19937 +19949 +19961 +19963 +19973 +19979 +19991 +19993 +19997 +20011 +20021 +20023 +20029 +20047 +20051 +20063 +20071 +20089 +20101 +20107 +20113 +20117 +20123 +20129 +20143 +20147 +20149 +20161 +20173 +20177 +20183 +20201 +20219 +20231 +20233 +20249 +20261 +20269 +20287 +20297 +20323 +20327 +20333 +20341 +20347 +20353 +20357 +20359 +20369 +20389 +20393 +20399 +20407 +20411 +20431 +20441 +20443 +20477 +20479 +20483 +20507 +20509 +20521 +20533 +20543 +20549 +20551 +20563 +20593 +20599 +20611 +20627 +20639 +20641 +20663 +20681 +20693 +20707 +20717 +20719 +20731 +20743 +20747 +20749 +20753 +20759 +20771 +20773 +20789 +20807 +20809 +20849 +20857 +20873 +20879 +20887 +20897 +20899 +20903 +20921 +20929 +20939 +20947 +20959 +20963 +20981 +20983 +21001 +21011 +21013 +21017 +21019 +21023 +21031 +21059 +21061 +21067 +21089 +21101 +21107 +21121 +21139 +21143 +21149 +21157 +21163 +21169 +21179 +21187 +21191 +21193 +21211 +21221 +21227 +21247 +21269 +21277 +21283 +21313 +21317 +21319 +21323 +21341 +21347 +21377 +21379 +21383 +21391 +21397 +21401 +21407 +21419 +21433 +21467 +21481 +21487 +21491 +21493 +21499 +21503 +21517 +21521 +21523 +21529 +21557 +21559 +21563 +21569 +21577 +21587 +21589 +21599 +21601 +21611 +21613 +21617 +21647 +21649 +21661 +21673 +21683 +21701 +21713 +21727 +21737 +21739 +21751 +21757 +21767 +21773 +21787 +21799 +21803 +21817 +21821 +21839 +21841 +21851 +21859 +21863 +21871 +21881 +21893 +21911 +21929 +21937 +21943 +21961 +21977 +21991 +21997 +22003 +22013 +22027 +22031 +22037 +22039 +22051 +22063 +22067 +22073 +22079 +22091 +22093 +22109 +22111 +22123 +22129 +22133 +22147 +22153 +22157 +22159 +22171 +22189 +22193 +22229 +22247 +22259 +22271 +22273 +22277 +22279 +22283 +22291 +22303 +22307 +22343 +22349 +22367 +22369 +22381 +22391 +22397 +22409 +22433 +22441 +22447 +22453 +22469 +22481 +22483 +22501 +22511 +22531 +22541 +22543 +22549 +22567 +22571 +22573 +22613 +22619 +22621 +22637 +22639 +22643 +22651 +22669 +22679 +22691 +22697 +22699 +22709 +22717 +22721 +22727 +22739 +22741 +22751 +22769 +22777 +22783 +22787 +22807 +22811 +22817 +22853 +22859 +22861 +22871 +22877 +22901 +22907 +22921 +22937 +22943 +22961 +22963 +22973 +22993 +23003 +23011 +23017 +23021 +23027 +23029 +23039 +23041 +23053 +23057 +23059 +23063 +23071 +23081 +23087 +23099 +23117 +23131 +23143 +23159 +23167 +23173 +23189 +23197 +23201 +23203 +23209 +23227 +23251 +23269 +23279 +23291 +23293 +23297 +23311 +23321 +23327 +23333 +23339 +23357 +23369 +23371 +23399 +23417 +23431 +23447 +23459 +23473 +23497 +23509 +23531 +23537 +23539 +23549 +23557 +23561 +23563 +23567 +23581 +23593 +23599 +23603 +23609 +23623 +23627 +23629 +23633 +23663 +23669 +23671 +23677 +23687 +23689 +23719 +23741 +23743 +23747 +23753 +23761 +23767 +23773 +23789 +23801 +23813 +23819 +23827 +23831 +23833 +23857 +23869 +23873 +23879 +23887 +23893 +23899 +23909 +23911 +23917 +23929 +23957 +23971 +23977 +23981 +23993 +24001 +24007 +24019 +24023 +24029 +24043 +24049 +24061 +24071 +24077 +24083 +24091 +24097 +24103 +24107 +24109 +24113 +24121 +24133 +24137 +24151 +24169 +24179 +24181 +24197 +24203 +24223 +24229 +24239 +24247 +24251 +24281 +24317 +24329 +24337 +24359 +24371 +24373 +24379 +24391 +24407 +24413 +24419 +24421 +24439 +24443 +24469 +24473 +24481 +24499 +24509 +24517 +24527 +24533 +24547 +24551 +24571 +24593 +24611 +24623 +24631 +24659 +24671 +24677 +24683 +24691 +24697 +24709 +24733 +24749 +24763 +24767 +24781 +24793 +24799 +24809 +24821 +24841 +24847 +24851 +24859 +24877 +24889 +24907 +24917 +24919 +24923 +24943 +24953 +24967 +24971 +24977 +24979 +24989 +25013 +25031 +25033 +25037 +25057 +25073 +25087 +25097 +25111 +25117 +25121 +25127 +25147 +25153 +25163 +25169 +25171 +25183 +25189 +25219 +25229 +25237 +25243 +25247 +25253 +25261 +25301 +25303 +25307 +25309 +25321 +25339 +25343 +25349 +25357 +25367 +25373 +25391 +25409 +25411 +25423 +25439 +25447 +25453 +25457 +25463 +25469 +25471 +25523 +25537 +25541 +25561 +25577 +25579 +25583 +25589 +25601 +25603 +25609 +25621 +25633 +25639 +25643 +25657 +25667 +25673 +25679 +25693 +25703 +25717 +25733 +25741 +25747 +25759 +25763 +25771 +25793 +25799 +25801 +25819 +25841 +25847 +25849 +25867 +25873 +25889 +25903 +25913 +25919 +25931 +25933 +25939 +25943 +25951 +25969 +25981 +25997 +25999 +26003 +26017 +26021 +26029 +26041 +26053 +26083 +26099 +26107 +26111 +26113 +26119 +26141 +26153 +26161 +26171 +26177 +26183 +26189 +26203 +26209 +26227 +26237 +26249 +26251 +26261 +26263 +26267 +26293 +26297 +26309 +26317 +26321 +26339 +26347 +26357 +26371 +26387 +26393 +26399 +26407 +26417 +26423 +26431 +26437 +26449 +26459 +26479 +26489 +26497 +26501 +26513 +26539 +26557 +26561 +26573 +26591 +26597 +26627 +26633 +26641 +26647 +26669 +26681 +26683 +26687 +26693 +26699 +26701 +26711 +26713 +26717 +26723 +26729 +26731 +26737 +26759 +26777 +26783 +26801 +26813 +26821 +26833 +26839 +26849 +26861 +26863 +26879 +26881 +26891 +26893 +26903 +26921 +26927 +26947 +26951 +26953 +26959 +26981 +26987 +26993 +27011 +27017 +27031 +27043 +27059 +27061 +27067 +27073 +27077 +27091 +27103 +27107 +27109 +27127 +27143 +27179 +27191 +27197 +27211 +27239 +27241 +27253 +27259 +27271 +27277 +27281 +27283 +27299 +27329 +27337 +27361 +27367 +27397 +27407 +27409 +27427 +27431 +27437 +27449 +27457 +27479 +27481 +27487 +27509 +27527 +27529 +27539 +27541 +27551 +27581 +27583 +27611 +27617 +27631 +27647 +27653 +27673 +27689 +27691 +27697 +27701 +27733 +27737 +27739 +27743 +27749 +27751 +27763 +27767 +27773 +27779 +27791 +27793 +27799 +27803 +27809 +27817 +27823 +27827 +27847 +27851 +27883 +27893 +27901 +27917 +27919 +27941 +27943 +27947 +27953 +27961 +27967 +27983 +27997 +28001 +28019 +28027 +28031 +28051 +28057 +28069 +28081 +28087 +28097 +28099 +28109 +28111 +28123 +28151 +28163 +28181 +28183 +28201 +28211 +28219 +28229 +28277 +28279 +28283 +28289 +28297 +28307 +28309 +28319 +28349 +28351 +28387 +28393 +28403 +28409 +28411 +28429 +28433 +28439 +28447 +28463 +28477 +28493 +28499 +28513 +28517 +28537 +28541 +28547 +28549 +28559 +28571 +28573 +28579 +28591 +28597 +28603 +28607 +28619 +28621 +28627 +28631 +28643 +28649 +28657 +28661 +28663 +28669 +28687 +28697 +28703 +28711 +28723 +28729 +28751 +28753 +28759 +28771 +28789 +28793 +28807 +28813 +28817 +28837 +28843 +28859 +28867 +28871 +28879 +28901 +28909 +28921 +28927 +28933 +28949 +28961 +28979 +29009 +29017 +29021 +29023 +29027 +29033 +29059 +29063 +29077 +29101 +29123 +29129 +29131 +29137 +29147 +29153 +29167 +29173 +29179 +29191 +29201 +29207 +29209 +29221 +29231 +29243 +29251 +29269 +29287 +29297 +29303 +29311 +29327 +29333 +29339 +29347 +29363 +29383 +29387 +29389 +29399 +29401 +29411 +29423 +29429 +29437 +29443 +29453 +29473 +29483 +29501 +29527 +29531 +29537 +29567 +29569 +29573 +29581 +29587 +29599 +29611 +29629 +29633 +29641 +29663 +29669 +29671 +29683 +29717 +29723 +29741 +29753 +29759 +29761 +29789 +29803 +29819 +29833 +29837 +29851 +29863 +29867 +29873 +29879 +29881 +29917 +29921 +29927 +29947 +29959 +29983 +29989 +30011 +30013 +30029 +30047 +30059 +30071 +30089 +30091 +30097 +30103 +30109 +30113 +30119 +30133 +30137 +30139 +30161 +30169 +30181 +30187 +30197 +30203 +30211 +30223 +30241 +30253 +30259 +30269 +30271 +30293 +30307 +30313 +30319 +30323 +30341 +30347 +30367 +30389 +30391 +30403 +30427 +30431 +30449 +30467 +30469 +30491 +30493 +30497 +30509 +30517 +30529 +30539 +30553 +30557 +30559 +30577 +30593 +30631 +30637 +30643 +30649 +30661 +30671 +30677 +30689 +30697 +30703 +30707 +30713 +30727 +30757 +30763 +30773 +30781 +30803 +30809 +30817 +30829 +30839 +30841 +30851 +30853 +30859 +30869 +30871 +30881 +30893 +30911 +30931 +30937 +30941 +30949 +30971 +30977 +30983 +31013 +31019 +31033 +31039 +31051 +31063 +31069 +31079 +31081 +31091 +31121 +31123 +31139 +31147 +31151 +31153 +31159 +31177 +31181 +31183 +31189 +31193 +31219 +31223 +31231 +31237 +31247 +31249 +31253 +31259 +31267 +31271 +31277 +31307 +31319 +31321 +31327 +31333 +31337 +31357 +31379 +31387 +31391 +31393 +31397 +31469 +31477 +31481 +31489 +31511 +31513 +31517 +31531 +31541 +31543 +31547 +31567 +31573 +31583 +31601 +31607 +31627 +31643 +31649 +31657 +31663 +31667 +31687 +31699 +31721 +31723 +31727 +31729 +31741 +31751 +31769 +31771 +31793 +31799 +31817 +31847 +31849 +31859 +31873 +31883 +31891 +31907 +31957 +31963 +31973 +31981 +31991 +32003 +32009 +32027 +32029 +32051 +32057 +32059 +32063 +32069 +32077 +32083 +32089 +32099 +32117 +32119 +32141 +32143 +32159 +32173 +32183 +32189 +32191 +32203 +32213 +32233 +32237 +32251 +32257 +32261 +32297 +32299 +32303 +32309 +32321 +32323 +32327 +32341 +32353 +32359 +32363 +32369 +32371 +32377 +32381 +32401 +32411 +32413 +32423 +32429 +32441 +32443 +32467 +32479 +32491 +32497 +32503 +32507 +32531 +32533 +32537 +32561 +32563 +32569 +32573 +32579 +32587 +32603 +32609 +32611 +32621 +32633 +32647 +32653 +32687 +32693 +32707 +32713 +32717 +32719 +32749 +32771 +32779 +32783 +32789 +32797 +32801 +32803 +32831 +32833 +32839 +32843 +32869 +32887 +32909 +32911 +32917 +32933 +32939 +32941 +32957 +32969 +32971 +32983 +32987 +32993 +32999 +33013 +33023 +33029 +33037 +33049 +33053 +33071 +33073 +33083 +33091 +33107 +33113 +33119 +33149 +33151 +33161 +33179 +33181 +33191 +33199 +33203 +33211 +33223 +33247 +33287 +33289 +33301 +33311 +33317 +33329 +33331 +33343 +33347 +33349 +33353 +33359 +33377 +33391 +33403 +33409 +33413 +33427 +33457 +33461 +33469 +33479 +33487 +33493 +33503 +33521 +33529 +33533 +33547 +33563 +33569 +33577 +33581 +33587 +33589 +33599 +33601 +33613 +33617 +33619 +33623 +33629 +33637 +33641 +33647 +33679 +33703 +33713 +33721 +33739 +33749 +33751 +33757 +33767 +33769 +33773 +33791 +33797 +33809 +33811 +33827 +33829 +33851 +33857 +33863 +33871 +33889 +33893 +33911 +33923 +33931 +33937 +33941 +33961 +33967 +33997 +34019 +34031 +34033 +34039 +34057 +34061 +34123 +34127 +34129 +34141 +34147 +34157 +34159 +34171 +34183 +34211 +34213 +34217 +34231 +34253 +34259 +34261 +34267 +34273 +34283 +34297 +34301 +34303 +34313 +34319 +34327 +34337 +34351 +34361 +34367 +34369 +34381 +34403 +34421 +34429 +34439 +34457 +34469 +34471 +34483 +34487 +34499 +34501 +34511 +34513 +34519 +34537 +34543 +34549 +34583 +34589 +34591 +34603 +34607 +34613 +34631 +34649 +34651 +34667 +34673 +34679 +34687 +34693 +34703 +34721 +34729 +34739 +34747 +34757 +34759 +34763 +34781 +34807 +34819 +34841 +34843 +34847 +34849 +34871 +34877 +34883 +34897 +34913 +34919 +34939 +34949 +34961 +34963 +34981 +35023 +35027 +35051 +35053 +35059 +35069 +35081 +35083 +35089 +35099 +35107 +35111 +35117 +35129 +35141 +35149 +35153 +35159 +35171 +35201 +35221 +35227 +35251 +35257 +35267 +35279 +35281 +35291 +35311 +35317 +35323 +35327 +35339 +35353 +35363 +35381 +35393 +35401 +35407 +35419 +35423 +35437 +35447 +35449 +35461 +35491 +35507 +35509 +35521 +35527 +35531 +35533 +35537 +35543 +35569 +35573 +35591 +35593 +35597 +35603 +35617 +35671 +35677 +35729 +35731 +35747 +35753 +35759 +35771 +35797 +35801 +35803 +35809 +35831 +35837 +35839 +35851 +35863 +35869 +35879 +35897 +35899 +35911 +35923 +35933 +35951 +35963 +35969 +35977 +35983 +35993 +35999 +36007 +36011 +36013 +36017 +36037 +36061 +36067 +36073 +36083 +36097 +36107 +36109 +36131 +36137 +36151 +36161 +36187 +36191 +36209 +36217 +36229 +36241 +36251 +36263 +36269 +36277 +36293 +36299 +36307 +36313 +36319 +36341 +36343 +36353 +36373 +36383 +36389 +36433 +36451 +36457 +36467 +36469 +36473 +36479 +36493 +36497 +36523 +36527 +36529 +36541 +36551 +36559 +36563 +36571 +36583 +36587 +36599 +36607 +36629 +36637 +36643 +36653 +36671 +36677 +36683 +36691 +36697 +36709 +36713 +36721 +36739 +36749 +36761 +36767 +36779 +36781 +36787 +36791 +36793 +36809 +36821 +36833 +36847 +36857 +36871 +36877 +36887 +36899 +36901 +36913 +36919 +36923 +36929 +36931 +36943 +36947 +36973 +36979 +36997 +37003 +37013 +37019 +37021 +37039 +37049 +37057 +37061 +37087 +37097 +37117 +37123 +37139 +37159 +37171 +37181 +37189 +37199 +37201 +37217 +37223 +37243 +37253 +37273 +37277 +37307 +37309 +37313 +37321 +37337 +37339 +37357 +37361 +37363 +37369 +37379 +37397 +37409 +37423 +37441 +37447 +37463 +37483 +37489 +37493 +37501 +37507 +37511 +37517 +37529 +37537 +37547 +37549 +37561 +37567 +37571 +37573 +37579 +37589 +37591 +37607 +37619 +37633 +37643 +37649 +37657 +37663 +37691 +37693 +37699 +37717 +37747 +37781 +37783 +37799 +37811 +37813 +37831 +37847 +37853 +37861 +37871 +37879 +37889 +37897 +37907 +37951 +37957 +37963 +37967 +37987 +37991 +37993 +37997 +38011 +38039 +38047 +38053 +38069 +38083 +38113 +38119 +38149 +38153 +38167 +38177 +38183 +38189 +38197 +38201 +38219 +38231 +38237 +38239 +38261 +38273 +38281 +38287 +38299 +38303 +38317 +38321 +38327 +38329 +38333 +38351 +38371 +38377 +38393 +38431 +38447 +38449 +38453 +38459 +38461 +38501 +38543 +38557 +38561 +38567 +38569 +38593 +38603 +38609 +38611 +38629 +38639 +38651 +38653 +38669 +38671 +38677 +38693 +38699 +38707 +38711 +38713 +38723 +38729 +38737 +38747 +38749 +38767 +38783 +38791 +38803 +38821 +38833 +38839 +38851 +38861 +38867 +38873 +38891 +38903 +38917 +38921 +38923 +38933 +38953 +38959 +38971 +38977 +38993 +39019 +39023 +39041 +39043 +39047 +39079 +39089 +39097 +39103 +39107 +39113 +39119 +39133 +39139 +39157 +39161 +39163 +39181 +39191 +39199 +39209 +39217 +39227 +39229 +39233 +39239 +39241 +39251 +39293 +39301 +39313 +39317 +39323 +39341 +39343 +39359 +39367 +39371 +39373 +39383 +39397 +39409 +39419 +39439 +39443 +39451 +39461 +39499 +39503 +39509 +39511 +39521 +39541 +39551 +39563 +39569 +39581 +39607 +39619 +39623 +39631 +39659 +39667 +39671 +39679 +39703 +39709 +39719 +39727 +39733 +39749 +39761 +39769 +39779 +39791 +39799 +39821 +39827 +39829 +39839 +39841 +39847 +39857 +39863 +39869 +39877 +39883 +39887 +39901 +39929 +39937 +39953 +39971 +39979 +39983 +39989 +40009 +40013 +40031 +40037 +40039 +40063 +40087 +40093 +40099 +40111 +40123 +40127 +40129 +40151 +40153 +40163 +40169 +40177 +40189 +40193 +40213 +40231 +40237 +40241 +40253 +40277 +40283 +40289 +40343 +40351 +40357 +40361 +40387 +40423 +40427 +40429 +40433 +40459 +40471 +40483 +40487 +40493 +40499 +40507 +40519 +40529 +40531 +40543 +40559 +40577 +40583 +40591 +40597 +40609 +40627 +40637 +40639 +40693 +40697 +40699 +40709 +40739 +40751 +40759 +40763 +40771 +40787 +40801 +40813 +40819 +40823 +40829 +40841 +40847 +40849 +40853 +40867 +40879 +40883 +40897 +40903 +40927 +40933 +40939 +40949 +40961 +40973 +40993 +41011 +41017 +41023 +41039 +41047 +41051 +41057 +41077 +41081 +41113 +41117 +41131 +41141 +41143 +41149 +41161 +41177 +41179 +41183 +41189 +41201 +41203 +41213 +41221 +41227 +41231 +41233 +41243 +41257 +41263 +41269 +41281 +41299 +41333 +41341 +41351 +41357 +41381 +41387 +41389 +41399 +41411 +41413 +41443 +41453 +41467 +41479 +41491 +41507 +41513 +41519 +41521 +41539 +41543 +41549 +41579 +41593 +41597 +41603 +41609 +41611 +41617 +41621 +41627 +41641 +41647 +41651 +41659 +41669 +41681 +41687 +41719 +41729 +41737 +41759 +41761 +41771 +41777 +41801 +41809 +41813 +41843 +41849 +41851 +41863 +41879 +41887 +41893 +41897 +41903 +41911 +41927 +41941 +41947 +41953 +41957 +41959 +41969 +41981 +41983 +41999 +42013 +42017 +42019 +42023 +42043 +42061 +42071 +42073 +42083 +42089 +42101 +42131 +42139 +42157 +42169 +42179 +42181 +42187 +42193 +42197 +42209 +42221 +42223 +42227 +42239 +42257 +42281 +42283 +42293 +42299 +42307 +42323 +42331 +42337 +42349 +42359 +42373 +42379 +42391 +42397 +42403 +42407 +42409 +42433 +42437 +42443 +42451 +42457 +42461 +42463 +42467 +42473 +42487 +42491 +42499 +42509 +42533 +42557 +42569 +42571 +42577 +42589 +42611 +42641 +42643 +42649 +42667 +42677 +42683 +42689 +42697 +42701 +42703 +42709 +42719 +42727 +42737 +42743 +42751 +42767 +42773 +42787 +42793 +42797 +42821 +42829 +42839 +42841 +42853 +42859 +42863 +42899 +42901 +42923 +42929 +42937 +42943 +42953 +42961 +42967 +42979 +42989 +43003 +43013 +43019 +43037 +43049 +43051 +43063 +43067 +43093 +43103 +43117 +43133 +43151 +43159 +43177 +43189 +43201 +43207 +43223 +43237 +43261 +43271 +43283 +43291 +43313 +43319 +43321 +43331 +43391 +43397 +43399 +43403 +43411 +43427 +43441 +43451 +43457 +43481 +43487 +43499 +43517 +43541 +43543 +43573 +43577 +43579 +43591 +43597 +43607 +43609 +43613 +43627 +43633 +43649 +43651 +43661 +43669 +43691 +43711 +43717 +43721 +43753 +43759 +43777 +43781 +43783 +43787 +43789 +43793 +43801 +43853 +43867 +43889 +43891 +43913 +43933 +43943 +43951 +43961 +43963 +43969 +43973 +43987 +43991 +43997 +44017 +44021 +44027 +44029 +44041 +44053 +44059 +44071 +44087 +44089 +44101 +44111 +44119 +44123 +44129 +44131 +44159 +44171 +44179 +44189 +44201 +44203 +44207 +44221 +44249 +44257 +44263 +44267 +44269 +44273 +44279 +44281 +44293 +44351 +44357 +44371 +44381 +44383 +44389 +44417 +44449 +44453 +44483 +44491 +44497 +44501 +44507 +44519 +44531 +44533 +44537 +44543 +44549 +44563 +44579 +44587 +44617 +44621 +44623 +44633 +44641 +44647 +44651 +44657 +44683 +44687 +44699 +44701 +44711 +44729 +44741 +44753 +44771 +44773 +44777 +44789 +44797 +44809 +44819 +44839 +44843 +44851 +44867 +44879 +44887 +44893 +44909 +44917 +44927 +44939 +44953 +44959 +44963 +44971 +44983 +44987 +45007 +45013 +45053 +45061 +45077 +45083 +45119 +45121 +45127 +45131 +45137 +45139 +45161 +45179 +45181 +45191 +45197 +45233 +45247 +45259 +45263 +45281 +45289 +45293 +45307 +45317 +45319 +45329 +45337 +45341 +45343 +45361 +45377 +45389 +45403 +45413 +45427 +45433 +45439 +45481 +45491 +45497 +45503 +45523 +45533 +45541 +45553 +45557 +45569 +45587 +45589 +45599 +45613 +45631 +45641 +45659 +45667 +45673 +45677 +45691 +45697 +45707 +45737 +45751 +45757 +45763 +45767 +45779 +45817 +45821 +45823 +45827 +45833 +45841 +45853 +45863 +45869 +45887 +45893 +45943 +45949 +45953 +45959 +45971 +45979 +45989 +46021 +46027 +46049 +46051 +46061 +46073 +46091 +46093 +46099 +46103 +46133 +46141 +46147 +46153 +46171 +46181 +46183 +46187 +46199 +46219 +46229 +46237 +46261 +46271 +46273 +46279 +46301 +46307 +46309 +46327 +46337 +46349 +46351 +46381 +46399 +46411 +46439 +46441 +46447 +46451 +46457 +46471 +46477 +46489 +46499 +46507 +46511 +46523 +46549 +46559 +46567 +46573 +46589 +46591 +46601 +46619 +46633 +46639 +46643 +46649 +46663 +46679 +46681 +46687 +46691 +46703 +46723 +46727 +46747 +46751 +46757 +46769 +46771 +46807 +46811 +46817 +46819 +46829 +46831 +46853 +46861 +46867 +46877 +46889 +46901 +46919 +46933 +46957 +46993 +46997 +47017 +47041 +47051 +47057 +47059 +47087 +47093 +47111 +47119 +47123 +47129 +47137 +47143 +47147 +47149 +47161 +47189 +47207 +47221 +47237 +47251 +47269 +47279 +47287 +47293 +47297 +47303 +47309 +47317 +47339 +47351 +47353 +47363 +47381 +47387 +47389 +47407 +47417 +47419 +47431 +47441 +47459 +47491 +47497 +47501 +47507 +47513 +47521 +47527 +47533 +47543 +47563 +47569 +47581 +47591 +47599 +47609 +47623 +47629 +47639 +47653 +47657 +47659 +47681 +47699 +47701 +47711 +47713 +47717 +47737 +47741 +47743 +47777 +47779 +47791 +47797 +47807 +47809 +47819 +47837 +47843 +47857 +47869 +47881 +47903 +47911 +47917 +47933 +47939 +47947 +47951 +47963 +47969 +47977 +47981 +48017 +48023 +48029 +48049 +48073 +48079 +48091 +48109 +48119 +48121 +48131 +48157 +48163 +48179 +48187 +48193 +48197 +48221 +48239 +48247 +48259 +48271 +48281 +48299 +48311 +48313 +48337 +48341 +48353 +48371 +48383 +48397 +48407 +48409 +48413 +48437 +48449 +48463 +48473 +48479 +48481 +48487 +48491 +48497 +48523 +48527 +48533 +48539 +48541 +48563 +48571 +48589 +48593 +48611 +48619 +48623 +48647 +48649 +48661 +48673 +48677 +48679 +48731 +48733 +48751 +48757 +48761 +48767 +48779 +48781 +48787 +48799 +48809 +48817 +48821 +48823 +48847 +48857 +48859 +48869 +48871 +48883 +48889 +48907 +48947 +48953 +48973 +48989 +48991 +49003 +49009 +49019 +49031 +49033 +49037 +49043 +49057 +49069 +49081 +49103 +49109 +49117 +49121 +49123 +49139 +49157 +49169 +49171 +49177 +49193 +49199 +49201 +49207 +49211 +49223 +49253 +49261 +49277 +49279 +49297 +49307 +49331 +49333 +49339 +49363 +49367 +49369 +49391 +49393 +49409 +49411 +49417 +49429 +49433 +49451 +49459 +49463 +49477 +49481 +49499 +49523 +49529 +49531 +49537 +49547 +49549 +49559 +49597 +49603 +49613 +49627 +49633 +49639 +49663 +49667 +49669 +49681 +49697 +49711 +49727 +49739 +49741 +49747 +49757 +49783 +49787 +49789 +49801 +49807 +49811 +49823 +49831 +49843 +49853 +49871 +49877 +49891 +49919 +49921 +49927 +49937 +49939 +49943 +49957 +49991 +49993 +49999 +50021 +50023 +50033 +50047 +50051 +50053 +50069 +50077 +50087 +50093 +50101 +50111 +50119 +50123 +50129 +50131 +50147 +50153 +50159 +50177 +50207 +50221 +50227 +50231 +50261 +50263 +50273 +50287 +50291 +50311 +50321 +50329 +50333 +50341 +50359 +50363 +50377 +50383 +50387 +50411 +50417 +50423 +50441 +50459 +50461 +50497 +50503 +50513 +50527 +50539 +50543 +50549 +50551 +50581 +50587 +50591 +50593 +50599 +50627 +50647 +50651 +50671 +50683 +50707 +50723 +50741 +50753 +50767 +50773 +50777 +50789 +50821 +50833 +50839 +50849 +50857 +50867 +50873 +50891 +50893 +50909 +50923 +50929 +50951 +50957 +50969 +50971 +50989 +50993 +51001 +51031 +51043 +51047 +51059 +51061 +51071 +51109 +51131 +51133 +51137 +51151 +51157 +51169 +51193 +51197 +51199 +51203 +51217 +51229 +51239 +51241 +51257 +51263 +51283 +51287 +51307 +51329 +51341 +51343 +51347 +51349 +51361 +51383 +51407 +51413 +51419 +51421 +51427 +51431 +51437 +51439 +51449 +51461 +51473 +51479 +51481 +51487 +51503 +51511 +51517 +51521 +51539 +51551 +51563 +51577 +51581 +51593 +51599 +51607 +51613 +51631 +51637 +51647 +51659 +51673 +51679 +51683 +51691 +51713 +51719 +51721 +51749 +51767 +51769 +51787 +51797 +51803 +51817 +51827 +51829 +51839 +51853 +51859 +51869 +51871 +51893 +51899 +51907 +51913 +51929 +51941 +51949 +51971 +51973 +51977 +51991 +52009 +52021 +52027 +52051 +52057 +52067 +52069 +52081 +52103 +52121 +52127 +52147 +52153 +52163 +52177 +52181 +52183 +52189 +52201 +52223 +52237 +52249 +52253 +52259 +52267 +52289 +52291 +52301 +52313 +52321 +52361 +52363 +52369 +52379 +52387 +52391 +52433 +52453 +52457 +52489 +52501 +52511 +52517 +52529 +52541 +52543 +52553 +52561 +52567 +52571 +52579 +52583 +52609 +52627 +52631 +52639 +52667 +52673 +52691 +52697 +52709 +52711 +52721 +52727 +52733 +52747 +52757 +52769 +52783 +52807 +52813 +52817 +52837 +52859 +52861 +52879 +52883 +52889 +52901 +52903 +52919 +52937 +52951 +52957 +52963 +52967 +52973 +52981 +52999 +53003 +53017 +53047 +53051 +53069 +53077 +53087 +53089 +53093 +53101 +53113 +53117 +53129 +53147 +53149 +53161 +53171 +53173 +53189 +53197 +53201 +53231 +53233 +53239 +53267 +53269 +53279 +53281 +53299 +53309 +53323 +53327 +53353 +53359 +53377 +53381 +53401 +53407 +53411 +53419 +53437 +53441 +53453 +53479 +53503 +53507 +53527 +53549 +53551 +53569 +53591 +53593 +53597 +53609 +53611 +53617 +53623 +53629 +53633 +53639 +53653 +53657 +53681 +53693 +53699 +53717 +53719 +53731 +53759 +53773 +53777 +53783 +53791 +53813 +53819 +53831 +53849 +53857 +53861 +53881 +53887 +53891 +53897 +53899 +53917 +53923 +53927 +53939 +53951 +53959 +53987 +53993 +54001 +54011 +54013 +54037 +54049 +54059 +54083 +54091 +54101 +54121 +54133 +54139 +54151 +54163 +54167 +54181 +54193 +54217 +54251 +54269 +54277 +54287 +54293 +54311 +54319 +54323 +54331 +54347 +54361 +54367 +54371 +54377 +54401 +54403 +54409 +54413 +54419 +54421 +54437 +54443 +54449 +54469 +54493 +54497 +54499 +54503 +54517 +54521 +54539 +54541 +54547 +54559 +54563 +54577 +54581 +54583 +54601 +54617 +54623 +54629 +54631 +54647 +54667 +54673 +54679 +54709 +54713 +54721 +54727 +54751 +54767 +54773 +54779 +54787 +54799 +54829 +54833 +54851 +54869 +54877 +54881 +54907 +54917 +54919 +54941 +54949 +54959 +54973 +54979 +54983 +55001 +55009 +55021 +55049 +55051 +55057 +55061 +55073 +55079 +55103 +55109 +55117 +55127 +55147 +55163 +55171 +55201 +55207 +55213 +55217 +55219 +55229 +55243 +55249 +55259 +55291 +55313 +55331 +55333 +55337 +55339 +55343 +55351 +55373 +55381 +55399 +55411 +55439 +55441 +55457 +55469 +55487 +55501 +55511 +55529 +55541 +55547 +55579 +55589 +55603 +55609 +55619 +55621 +55631 +55633 +55639 +55661 +55663 +55667 +55673 +55681 +55691 +55697 +55711 +55717 +55721 +55733 +55763 +55787 +55793 +55799 +55807 +55813 +55817 +55819 +55823 +55829 +55837 +55843 +55849 +55871 +55889 +55897 +55901 +55903 +55921 +55927 +55931 +55933 +55949 +55967 +55987 +55997 +56003 +56009 +56039 +56041 +56053 +56081 +56087 +56093 +56099 +56101 +56113 +56123 +56131 +56149 +56167 +56171 +56179 +56197 +56207 +56209 +56237 +56239 +56249 +56263 +56267 +56269 +56299 +56311 +56333 +56359 +56369 +56377 +56383 +56393 +56401 +56417 +56431 +56437 +56443 +56453 +56467 +56473 +56477 +56479 +56489 +56501 +56503 +56509 +56519 +56527 +56531 +56533 +56543 +56569 +56591 +56597 +56599 +56611 +56629 +56633 +56659 +56663 +56671 +56681 +56687 +56701 +56711 +56713 +56731 +56737 +56747 +56767 +56773 +56779 +56783 +56807 +56809 +56813 +56821 +56827 +56843 +56857 +56873 +56891 +56893 +56897 +56909 +56911 +56921 +56923 +56929 +56941 +56951 +56957 +56963 +56983 +56989 +56993 +56999 +57037 +57041 +57047 +57059 +57073 +57077 +57089 +57097 +57107 +57119 +57131 +57139 +57143 +57149 +57163 +57173 +57179 +57191 +57193 +57203 +57221 +57223 +57241 +57251 +57259 +57269 +57271 +57283 +57287 +57301 +57329 +57331 +57347 +57349 +57367 +57373 +57383 +57389 +57397 +57413 +57427 +57457 +57467 +57487 +57493 +57503 +57527 +57529 +57557 +57559 +57571 +57587 +57593 +57601 +57637 +57641 +57649 +57653 +57667 +57679 +57689 +57697 +57709 +57713 +57719 +57727 +57731 +57737 +57751 +57773 +57781 +57787 +57791 +57793 +57803 +57809 +57829 +57839 +57847 +57853 +57859 +57881 +57899 +57901 +57917 +57923 +57943 +57947 +57973 +57977 +57991 +58013 +58027 +58031 +58043 +58049 +58057 +58061 +58067 +58073 +58099 +58109 +58111 +58129 +58147 +58151 +58153 +58169 +58171 +58189 +58193 +58199 +58207 +58211 +58217 +58229 +58231 +58237 +58243 +58271 +58309 +58313 +58321 +58337 +58363 +58367 +58369 +58379 +58391 +58393 +58403 +58411 +58417 +58427 +58439 +58441 +58451 +58453 +58477 +58481 +58511 +58537 +58543 +58549 +58567 +58573 +58579 +58601 +58603 +58613 +58631 +58657 +58661 +58679 +58687 +58693 +58699 +58711 +58727 +58733 +58741 +58757 +58763 +58771 +58787 +58789 +58831 +58889 +58897 +58901 +58907 +58909 +58913 +58921 +58937 +58943 +58963 +58967 +58979 +58991 +58997 +59009 +59011 +59021 +59023 +59029 +59051 +59053 +59063 +59069 +59077 +59083 +59093 +59107 +59113 +59119 +59123 +59141 +59149 +59159 +59167 +59183 +59197 +59207 +59209 +59219 +59221 +59233 +59239 +59243 +59263 +59273 +59281 +59333 +59341 +59351 +59357 +59359 +59369 +59377 +59387 +59393 +59399 +59407 +59417 +59419 +59441 +59443 +59447 +59453 +59467 +59471 +59473 +59497 +59509 +59513 +59539 +59557 +59561 +59567 +59581 +59611 +59617 +59621 +59627 +59629 +59651 +59659 +59663 +59669 +59671 +59693 +59699 +59707 +59723 +59729 +59743 +59747 +59753 +59771 +59779 +59791 +59797 +59809 +59833 +59863 +59879 +59887 +59921 +59929 +59951 +59957 +59971 +59981 +59999 +60013 +60017 +60029 +60037 +60041 +60077 +60083 +60089 +60091 +60101 +60103 +60107 +60127 +60133 +60139 +60149 +60161 +60167 +60169 +60209 +60217 +60223 +60251 +60257 +60259 +60271 +60289 +60293 +60317 +60331 +60337 +60343 +60353 +60373 +60383 +60397 +60413 +60427 +60443 +60449 +60457 +60493 +60497 +60509 +60521 +60527 +60539 +60589 +60601 +60607 +60611 +60617 +60623 +60631 +60637 +60647 +60649 +60659 +60661 +60679 +60689 +60703 +60719 +60727 +60733 +60737 +60757 +60761 +60763 +60773 +60779 +60793 +60811 +60821 +60859 +60869 +60887 +60889 +60899 +60901 +60913 +60917 +60919 +60923 +60937 +60943 +60953 +60961 +61001 +61007 +61027 +61031 +61043 +61051 +61057 +61091 +61099 +61121 +61129 +61141 +61151 +61153 +61169 +61211 +61223 +61231 +61253 +61261 +61283 +61291 +61297 +61331 +61333 +61339 +61343 +61357 +61363 +61379 +61381 +61403 +61409 +61417 +61441 +61463 +61469 +61471 +61483 +61487 +61493 +61507 +61511 +61519 +61543 +61547 +61553 +61559 +61561 +61583 +61603 +61609 +61613 +61627 +61631 +61637 +61643 +61651 +61657 +61667 +61673 +61681 +61687 +61703 +61717 +61723 +61729 +61751 +61757 +61781 +61813 +61819 +61837 +61843 +61861 +61871 +61879 +61909 +61927 +61933 +61949 +61961 +61967 +61979 +61981 +61987 +61991 +62003 +62011 +62017 +62039 +62047 +62053 +62057 +62071 +62081 +62099 +62119 +62129 +62131 +62137 +62141 +62143 +62171 +62189 +62191 +62201 +62207 +62213 +62219 +62233 +62273 +62297 +62299 +62303 +62311 +62323 +62327 +62347 +62351 +62383 +62401 +62417 +62423 +62459 +62467 +62473 +62477 +62483 +62497 +62501 +62507 +62533 +62539 +62549 +62563 +62581 +62591 +62597 +62603 +62617 +62627 +62633 +62639 +62653 +62659 +62683 +62687 +62701 +62723 +62731 +62743 +62753 +62761 +62773 +62791 +62801 +62819 +62827 +62851 +62861 +62869 +62873 +62897 +62903 +62921 +62927 +62929 +62939 +62969 +62971 +62981 +62983 +62987 +62989 +63029 +63031 +63059 +63067 +63073 +63079 +63097 +63103 +63113 +63127 +63131 +63149 +63179 +63197 +63199 +63211 +63241 +63247 +63277 +63281 +63299 +63311 +63313 +63317 +63331 +63337 +63347 +63353 +63361 +63367 +63377 +63389 +63391 +63397 +63409 +63419 +63421 +63439 +63443 +63463 +63467 +63473 +63487 +63493 +63499 +63521 +63527 +63533 +63541 +63559 +63577 +63587 +63589 +63599 +63601 +63607 +63611 +63617 +63629 +63647 +63649 +63659 +63667 +63671 +63689 +63691 +63697 +63703 +63709 +63719 +63727 +63737 +63743 +63761 +63773 +63781 +63793 +63799 +63803 +63809 +63823 +63839 +63841 +63853 +63857 +63863 +63901 +63907 +63913 +63929 +63949 +63977 +63997 +64007 +64013 +64019 +64033 +64037 +64063 +64067 +64081 +64091 +64109 +64123 +64151 +64153 +64157 +64171 +64187 +64189 +64217 +64223 +64231 +64237 +64271 +64279 +64283 +64301 +64303 +64319 +64327 +64333 +64373 +64381 +64399 +64403 +64433 +64439 +64451 +64453 +64483 +64489 +64499 +64513 +64553 +64567 +64577 +64579 +64591 +64601 +64609 +64613 +64621 +64627 +64633 +64661 +64663 +64667 +64679 +64693 +64709 +64717 +64747 +64763 +64781 +64783 +64793 +64811 +64817 +64849 +64853 +64871 +64877 +64879 +64891 +64901 +64919 +64921 +64927 +64937 +64951 +64969 +64997 +65003 +65011 +65027 +65029 +65033 +65053 +65063 +65071 +65089 +65099 +65101 +65111 +65119 +65123 +65129 +65141 +65147 +65167 +65171 +65173 +65179 +65183 +65203 +65213 +65239 +65257 +65267 +65269 +65287 +65293 +65309 +65323 +65327 +65353 +65357 +65371 +65381 +65393 +65407 +65413 +65419 +65423 +65437 +65447 +65449 +65479 +65497 +65519 +65521 diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod new file mode 100644 index 0000000000..6da4d4a9c4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/prng.pod @@ -0,0 +1,38 @@ +=head1 NAME + + prng - pseudo-random number generator + +=head1 SYNOPSIS + + prng [count] + +=head1 DESCRIPTION + +B generates 32-bit pseudo-random integers using the +Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using +the standard C library's rand() function, which itself seeded from the +system clock and the process ID number. Thus, the values generated +are not particularly useful for cryptographic applications, but they +are in general much better than the typical output of the usual +multiplicative congruency generator used by most runtime libraries. + +You may optionally specify how many random values should be generated +by giving a I argument on the command line. If you do not +specify a count, only one random value will be generated. The results +are output to the standard output in decimal notation, one value per +line. + +=head1 RESTRICTIONS + +As stated above, B uses the C library's rand() function to seed +the generator, so it is not terribly suitable for cryptographic +applications. Also note that each time you run the program, a new +seed is generated, so it is better to run it once with a I +parameter than it is to run it multiple times to generate several +values. + +=head1 AUTHOR + + Michael J. Fromberger + Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved + Thayer School of Engineering, Dartmouth College, Hanover, NH USA diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt new file mode 100644 index 0000000000..0df0f0390a --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/redux.txt @@ -0,0 +1,86 @@ +Modular Reduction + +Usually, modular reduction is accomplished by long division, using the +mp_div() or mp_mod() functions. However, when performing modular +exponentiation, you spend a lot of time reducing by the same modulus +again and again. For this purpose, doing a full division for each +multiplication is quite inefficient. + +For this reason, the mp_exptmod() function does not perform modular +reductions in the usual way, but instead takes advantage of an +algorithm due to Barrett, as described by Menezes, Oorschot and +VanStone in their book _Handbook of Applied Cryptography_, published +by the CRC Press (see Chapter 14 for details). This method reduces +most of the computation of reduction to efficient shifting and masking +operations, and avoids the multiple-precision division entirely. + +Here is a brief synopsis of Barrett reduction, as it is implemented in +this library. + +Let b denote the radix of the computation (one more than the maximum +value that can be denoted by an mp_digit). Let m be the modulus, and +let k be the number of significant digits of m. Let x be the value to +be reduced modulo m. By the Division Theorem, there exist unique +integers Q and R such that: + + x = Qm + R, 0 <= R < m + +Barrett reduction takes advantage of the fact that you can easily +approximate Q to within two, given a value M such that: + + 2k + b + M = floor( ----- ) + m + +Computation of M requires a full-precision division step, so if you +are only doing a single reduction by m, you gain no advantage. +However, when multiple reductions by the same m are required, this +division need only be done once, beforehand. Using this, we can use +the following equation to compute Q', an approximation of Q: + + x + floor( ------ ) M + k-1 + b +Q' = floor( ----------------- ) + k+1 + b + +The divisions by b^(k-1) and b^(k+1) and the floor() functions can be +efficiently implemented with shifts and masks, leaving only a single +multiplication to be performed to get this approximation. It can be +shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with +two additional subtractions to bring the value into line with the +actual value of Q. + +Once we've got Q', we basically multiply that by m and subtract from +x, yielding: + + x - Q'm = Qm + R - Q'm + +Since we know the constraint on Q', this is one of: + + R + m + R + 2m + R + +Since R < m by the Division Theorem, we can simply subtract off m +until we get a value in the correct range, which will happen with no +more than 2 subtractions: + + v = x - Q'm + + while(v >= m) + v = v - m + endwhile + + +In random performance trials, modular exponentiation using this method +of reduction gave around a 40% speedup over using the division for +reduction. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt new file mode 100644 index 0000000000..4529cbfc46 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt @@ -0,0 +1,50 @@ +Square Root + +A simple iterative algorithm is used to compute the greatest integer +less than or equal to the square root. Essentially, this is Newton's +linear approximation, computed by finding successive values of the +equation: + + x[k]^2 - V +x[k+1] = x[k] - ------------ + 2 x[k] + +...where V is the value for which the square root is being sought. In +essence, what is happening here is that we guess a value for the +square root, then figure out how far off we were by squaring our guess +and subtracting the target. Using this value, we compute a linear +approximation for the error, and adjust the "guess". We keep doing +this until the precision gets low enough that the above equation +yields a quotient of zero. At this point, our last guess is one +greater than the square root we're seeking. + +The initial guess is computed by dividing V by 4, which is a heuristic +I have found to be fairly good on average. This also has the +advantage of being very easy to compute efficiently, even for large +values. + +So, the resulting algorithm works as follows: + + x = V / 4 /* compute initial guess */ + + loop + t = (x * x) - V /* Compute absolute error */ + u = 2 * x /* Adjust by tangent slope */ + t = t / u + + /* Loop is done if error is zero */ + if(t == 0) + break + + /* Adjust guess by error term */ + x = x - t + end + + x = x - 1 + +The result of the computation is the value of x. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt new file mode 100644 index 0000000000..edbb97882c --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/square.txt @@ -0,0 +1,72 @@ +Squaring Algorithm + +When you are squaring a value, you can take advantage of the fact that +half the multiplications performed by the more general multiplication +algorithm (see 'mul.txt' for a description) are redundant when the +multiplicand equals the multiplier. + +In particular, the modified algorithm is: + +k = 0 +for j <- 0 to (#a - 1) + w = c[2*j] + (a[j] ^ 2); + k = w div R + + for i <- j+1 to (#a - 1) + w = (2 * a[j] * a[i]) + k + c[i+j] + c[i+j] = w mod R + k = w div R + endfor + c[i+j] = k; + k = 0; +endfor + +On the surface, this looks identical to the multiplication algorithm; +however, note the following differences: + + - precomputation of the leading term in the outer loop + + - i runs from j+1 instead of from zero + + - doubling of a[i] * a[j] in the inner product + +Unfortunately, the construction of the inner product is such that we +need more than two digits to represent the inner product, in some +cases. In a C implementation, this means that some gymnastics must be +performed in order to handle overflow, for which C has no direct +abstraction. We do this by observing the following: + +If we have multiplied a[i] and a[j], and the product is more than half +the maximum value expressible in two digits, then doubling this result +will overflow into a third digit. If this occurs, we take note of the +overflow, and double it anyway -- C integer arithmetic ignores +overflow, so the two digits we get back should still be valid, modulo +the overflow. + +Having doubled this value, we now have to add in the remainders and +the digits already computed by earlier steps. If we did not overflow +in the previous step, we might still cause an overflow here. That +will happen whenever the maximum value expressible in two digits, less +the amount we have to add, is greater than the result of the previous +step. Thus, the overflow computation is: + + + u = 0 + w = a[i] * a[j] + + if(w > (R - 1)/ 2) + u = 1; + + w = w * 2 + v = c[i + j] + k + + if(u == 0 && (R - 1 - v) < w) + u = 1 + +If there is an overflow, u will be 1, otherwise u will be 0. The rest +of the parameters are the same as they are in the above description. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt new file mode 100644 index 0000000000..58f37c9dff --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/timing.txt @@ -0,0 +1,213 @@ +MPI Library Timing Tests + +Hardware/OS +(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3 +(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3 +(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20 +(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac +(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1 +(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2 + +Compiler +(1) MIPSpro C 7.2.1 -O3 optimizations +(2) GCC 2.95.1 -O3 optimizations +(3) IBM AIX xlc -O3 optimizations (version unknown) +(4) EGCS 2.91.66 -O3 optimizations +(5) Metrowerks CodeWarrior 5.0 C, all optimizations +(6) MIPSpro C 7.30 -O3 optimizations +(7) same as (6), with optimized libmalloc.so + +Timings are given in seconds, computed using the C library's clock() +function. The first column gives the hardware and compiler +configuration used for the test. The second column indicates the +number of tests that were aggregated to get the statistics for that +size. These were compiled using 16 bit digits. + +Source data were generated randomly using a fixed seed, so they should +be internally consistent, but may vary on different systems depending +on the C library. Also, since the resolution of the timer accessed by +clock() varies, there may be some variance in the precision of these +measurements. + +Prime Generation (primegen) + +128 bits: +A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46 +A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55 +B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29 +C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14 +D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70 +A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48 +A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07 + +192 bits: +A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96 +A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55 +B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97 +C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24 +D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63 +A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84 +A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88 + +256 bits: +A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79 +A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11 +B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35 +C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91 +D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00 +A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46 +A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60 + +320 bits: +A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81 +A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03 +B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80 +C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59 +D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73 +A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01 +A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78 + +384 bits: +A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89 +A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14 +B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78 +C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13 +D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81 +A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55 +A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02 + +448 bits: +A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63 +A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86 +B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86 +C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36 +D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17 +A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58 +A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16 + +512 bits: +A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35 +A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18 +B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45 +C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22 +D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11 +A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83 +A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02 + +Modular Exponentation (metime) + +The following results are aggregated from 200 pseudo-randomly +generated tests, based on a fixed seed. + + base, exponent, and modulus size (bits) +P/C 128 192 256 320 384 448 512 640 768 896 1024 +------- ----------------------------------------------------------------- +A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040 +A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668 +B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840 +C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507 +D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899 +E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317 +A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880 +A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855 + +Multiplication and Squaring tests, (mulsqr) + +The following results are aggregated from 500000 pseudo-randomly +generated tests, based on a per-run wall-clock seed. Times are given +in seconds, except where indicated in microseconds (us). + +(A1) + +bits multiply square ad percent time/mult time/square +64 9.33 9.15 > 1.9 18.7us 18.3us +128 10.88 10.44 > 4.0 21.8us 20.9us +192 13.30 11.89 > 10.6 26.7us 23.8us +256 14.88 12.64 > 15.1 29.8us 25.3us +320 18.64 15.01 > 19.5 37.3us 30.0us +384 23.11 17.70 > 23.4 46.2us 35.4us +448 28.28 20.88 > 26.2 56.6us 41.8us +512 34.09 24.51 > 28.1 68.2us 49.0us +640 47.86 33.25 > 30.5 95.7us 66.5us +768 64.91 43.54 > 32.9 129.8us 87.1us +896 84.49 55.48 > 34.3 169.0us 111.0us +1024 107.25 69.21 > 35.5 214.5us 138.4us +1536 227.97 141.91 > 37.8 456.0us 283.8us +2048 394.05 242.15 > 38.5 788.1us 484.3us + +(A2) + +bits multiply square ad percent time/mult time/square +64 7.87 7.95 < 1.0 15.7us 15.9us +128 9.40 9.19 > 2.2 18.8us 18.4us +192 11.15 10.59 > 5.0 22.3us 21.2us +256 12.02 11.16 > 7.2 24.0us 22.3us +320 14.62 13.43 > 8.1 29.2us 26.9us +384 17.72 15.80 > 10.8 35.4us 31.6us +448 21.24 18.51 > 12.9 42.5us 37.0us +512 25.36 21.78 > 14.1 50.7us 43.6us +640 34.57 29.00 > 16.1 69.1us 58.0us +768 46.10 37.60 > 18.4 92.2us 75.2us +896 58.94 47.72 > 19.0 117.9us 95.4us +1024 73.76 59.12 > 19.8 147.5us 118.2us +1536 152.00 118.80 > 21.8 304.0us 237.6us +2048 259.41 199.57 > 23.1 518.8us 399.1us + +(B3) + +bits multiply square ad percent time/mult time/square +64 2.60 2.47 > 5.0 5.20us 4.94us +128 4.43 4.06 > 8.4 8.86us 8.12us +192 7.03 6.10 > 13.2 14.1us 12.2us +256 10.44 8.59 > 17.7 20.9us 17.2us +320 14.44 11.64 > 19.4 28.9us 23.3us +384 19.12 15.08 > 21.1 38.2us 30.2us +448 24.55 19.09 > 22.2 49.1us 38.2us +512 31.03 23.53 > 24.2 62.1us 47.1us +640 45.05 33.80 > 25.0 90.1us 67.6us +768 63.02 46.05 > 26.9 126.0us 92.1us +896 83.74 60.29 > 28.0 167.5us 120.6us +1024 106.73 76.65 > 28.2 213.5us 153.3us +1536 228.94 160.98 > 29.7 457.9us 322.0us +2048 398.08 275.93 > 30.7 796.2us 551.9us + +(C4) + +bits multiply square ad percent time/mult time/square +64 1.34 1.28 > 4.5 2.68us 2.56us +128 2.76 2.59 > 6.2 5.52us 5.18us +192 4.52 4.16 > 8.0 9.04us 8.32us +256 6.64 5.99 > 9.8 13.3us 12.0us +320 9.20 8.13 > 11.6 18.4us 16.3us +384 12.01 10.58 > 11.9 24.0us 21.2us +448 15.24 13.33 > 12.5 30.5us 26.7us +512 19.02 16.46 > 13.5 38.0us 32.9us +640 27.56 23.54 > 14.6 55.1us 47.1us +768 37.89 31.78 > 16.1 75.8us 63.6us +896 49.24 41.42 > 15.9 98.5us 82.8us +1024 62.59 52.18 > 16.6 125.2us 104.3us +1536 131.66 107.72 > 18.2 263.3us 215.4us +2048 226.45 182.95 > 19.2 453.0us 365.9us + +(A7) + +bits multiply square ad percent time/mult time/square +64 1.74 1.71 > 1.7 3.48us 3.42us +128 3.48 2.96 > 14.9 6.96us 5.92us +192 5.74 4.60 > 19.9 11.5us 9.20us +256 8.75 6.61 > 24.5 17.5us 13.2us +320 12.5 8.99 > 28.1 25.0us 18.0us +384 16.9 11.9 > 29.6 33.8us 23.8us +448 22.2 15.2 > 31.7 44.4us 30.4us +512 28.3 19.0 > 32.7 56.6us 38.0us +640 42.4 28.0 > 34.0 84.8us 56.0us +768 59.4 38.5 > 35.2 118.8us 77.0us +896 79.5 51.2 > 35.6 159.0us 102.4us +1024 102.6 65.5 > 36.2 205.2us 131.0us +1536 224.3 140.6 > 37.3 448.6us 281.2us +2048 393.4 244.3 > 37.9 786.8us 488.6us + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s new file mode 100644 index 0000000000..ae9da630d1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/hpma512.s @@ -0,0 +1,615 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* + * + * This PA-RISC 2.0 function computes the product of two unsigned integers, + * and adds the result to a previously computed integer. The multiplicand + * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in + * memory in little-double-wordian order. The multiplier is an unsigned + * 64-bit integer. The previously computed integer to which the product is + * added is located in the result ("res") area, and is assumed to be a + * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory + * in little-double-wordian order. This value normally will be the result + * of a previously computed nine doubleword result. It is not necessary + * to pad the multiplicand with an additional 64-bit zero doubleword. + * + * Multiplicand, multiplier, and addend ideally should be aligned at + * 16-byte boundaries for best performance. The code will function + * correctly for alignment at eight-byte boundaries which are not 16-byte + * boundaries, but the execution may be slightly slower due to even/odd + * bank conflicts on PA-RISC 8000 processors. + * + * This function is designed to accept the same calling sequence as Bill + * Ackerman's "maxpy_little" function. The carry from the ninth doubleword + * of the result is written to the tenth word of the result, as is done by + * Bill Ackerman's function. The final carry also is returned as an + * integer, which may be ignored. The function prototype may be either + * of the following: + * + * void multacc512( int l, chunk* m, const chunk* a, chunk* res ); + * or + * int multacc512( int l, chunk* m, const chunk* a, chunk* res ); + * + * where: "l" originally denoted vector lengths. This parameter is + * ignored. This function always assumes a multiplicand length of + * 512 bits (eight doublewords), and addend and result lengths of + * 576 bits (nine doublewords). + * + * "m" is a pointer to the doubleword multiplier, ideally aligned + * on a 16-byte boundary. + * + * "a" is a pointer to the eight-doubleword multiplicand, stored + * in little-double-wordian order, and ideally aligned on a 16-byte + * boundary. + * + * "res" is a pointer to the nine doubleword addend, and to the + * nine-doubleword product computed by this function. The result + * also is stored in little-double-wordian order, and ideally is + * aligned on a 16-byte boundary. It is expected that the alignment + * of the "res" area may alternate between even/odd doubleword + * boundaries for successive calls for 512-bit x 512-bit + * multiplications. + * + * The code for this function has been scheduled to use the parallelism + * of the PA-RISC 8000 series microprocessors as well as the author was + * able. Comments and/or suggestions for improvement are welcomed. + * + * The code is "64-bit safe". This means it may be called in either + * the 32ILP context or the 64LP context. All 64-bits of registers are + * saved and restored. + * + * This code is self-contained. It requires no other header files in order + * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic + * definitions for registers and stack offsets are included within this + * one source file. + * + * This is a leaf routine. As such, minimal use is made of the stack area. + * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight + * general registers, and 128 bytes are used to move intermediate products + * from the floating-point registers to the general registers. Stack + * protocols assure proper alignment of these areas. + * + */ + + +/* ====================================================================*/ +/* symbolic definitions for PA-RISC registers */ +/* in the MIPS style, avoids lots of case shifts */ +/* assigments (except t4) preserve register number parity */ +/* ====================================================================*/ + +#define zero %r0 /* permanent zero */ +#define t5 %r1 /* temp register, altered by addil */ + +#define rp %r2 /* return pointer */ + +#define s1 %r3 /* callee saves register*/ +#define s0 %r4 /* callee saves register*/ +#define s3 %r5 /* callee saves register*/ +#define s2 %r6 /* callee saves register*/ +#define s5 %r7 /* callee saves register*/ +#define s4 %r8 /* callee saves register*/ +#define s7 %r9 /* callee saves register*/ +#define s6 %r10 /* callee saves register*/ + +#define t1 %r19 /* caller saves register*/ +#define t0 %r20 /* caller saves register*/ +#define t3 %r21 /* caller saves register*/ +#define t2 %r22 /* caller saves register*/ + +#define a3 %r23 /* fourth argument register, high word */ +#define a2 %r24 /* third argument register, low word*/ +#define a1 %r25 /* second argument register, high word*/ +#define a0 %r26 /* first argument register, low word*/ + +#define v0 %r28 /* high order return value*/ +#define v1 %r29 /* low order return value*/ + +#define sp %r30 /* stack pointer*/ +#define t4 %r31 /* temporary register */ + +#define fa0 %fr4 /* first argument register*/ +#define fa1 %fr5 /* second argument register*/ +#define fa2 %fr6 /* third argument register*/ +#define fa3 %fr7 /* fourth argument register*/ + +#define fa0r %fr4R /* first argument register*/ +#define fa1r %fr5R /* second argument register*/ +#define fa2r %fr6R /* third argument register*/ +#define fa3r %fr7R /* fourth argument register*/ + +#define ft0 %fr8 /* caller saves register*/ +#define ft1 %fr9 /* caller saves register*/ +#define ft2 %fr10 /* caller saves register*/ +#define ft3 %fr11 /* caller saves register*/ + +#define ft0r %fr8R /* caller saves register*/ +#define ft1r %fr9R /* caller saves register*/ +#define ft2r %fr10R /* caller saves register*/ +#define ft3r %fr11R /* caller saves register*/ + +#define ft4 %fr22 /* caller saves register*/ +#define ft5 %fr23 /* caller saves register*/ +#define ft6 %fr24 /* caller saves register*/ +#define ft7 %fr25 /* caller saves register*/ +#define ft8 %fr26 /* caller saves register*/ +#define ft9 %fr27 /* caller saves register*/ +#define ft10 %fr28 /* caller saves register*/ +#define ft11 %fr29 /* caller saves register*/ +#define ft12 %fr30 /* caller saves register*/ +#define ft13 %fr31 /* caller saves register*/ + +#define ft4r %fr22R /* caller saves register*/ +#define ft5r %fr23R /* caller saves register*/ +#define ft6r %fr24R /* caller saves register*/ +#define ft7r %fr25R /* caller saves register*/ +#define ft8r %fr26R /* caller saves register*/ +#define ft9r %fr27R /* caller saves register*/ +#define ft10r %fr28R /* caller saves register*/ +#define ft11r %fr29R /* caller saves register*/ +#define ft12r %fr30R /* caller saves register*/ +#define ft13r %fr31R /* caller saves register*/ + + + +/* ================================================================== */ +/* functional definitions for PA-RISC registers */ +/* ================================================================== */ + +/* general registers */ + +#define T1 a0 /* temp, (length parameter ignored) */ + +#define pM a1 /* -> 64-bit multiplier */ +#define T2 a1 /* temp, (after fetching multiplier) */ + +#define pA a2 /* -> multiplicand vector (8 64-bit words) */ +#define T3 a2 /* temp, (after fetching multiplicand) */ + +#define pR a3 /* -> addend vector (8 64-bit doublewords, + result vector (9 64-bit words) */ + +#define S0 s0 /* callee saves summand registers */ +#define S1 s1 +#define S2 s2 +#define S3 s3 +#define S4 s4 +#define S5 s5 +#define S6 s6 +#define S7 s7 + +#define S8 v0 /* caller saves summand registers */ +#define S9 v1 +#define S10 t0 +#define S11 t1 +#define S12 t2 +#define S13 t3 +#define S14 t4 +#define S15 t5 + + + +/* floating-point registers */ + +#define M fa0 /* multiplier double word */ +#define MR fa0r /* low order half of multiplier double word */ +#define ML fa0 /* high order half of multiplier double word */ + +#define A0 fa2 /* multiplicand double word 0 */ +#define A0R fa2r /* low order half of multiplicand double word */ +#define A0L fa2 /* high order half of multiplicand double word */ + +#define A1 fa3 /* multiplicand double word 1 */ +#define A1R fa3r /* low order half of multiplicand double word */ +#define A1L fa3 /* high order half of multiplicand double word */ + +#define A2 ft0 /* multiplicand double word 2 */ +#define A2R ft0r /* low order half of multiplicand double word */ +#define A2L ft0 /* high order half of multiplicand double word */ + +#define A3 ft1 /* multiplicand double word 3 */ +#define A3R ft1r /* low order half of multiplicand double word */ +#define A3L ft1 /* high order half of multiplicand double word */ + +#define A4 ft2 /* multiplicand double word 4 */ +#define A4R ft2r /* low order half of multiplicand double word */ +#define A4L ft2 /* high order half of multiplicand double word */ + +#define A5 ft3 /* multiplicand double word 5 */ +#define A5R ft3r /* low order half of multiplicand double word */ +#define A5L ft3 /* high order half of multiplicand double word */ + +#define A6 ft4 /* multiplicand double word 6 */ +#define A6R ft4r /* low order half of multiplicand double word */ +#define A6L ft4 /* high order half of multiplicand double word */ + +#define A7 ft5 /* multiplicand double word 7 */ +#define A7R ft5r /* low order half of multiplicand double word */ +#define A7L ft5 /* high order half of multiplicand double word */ + +#define P0 ft6 /* product word 0 */ +#define P1 ft7 /* product word 0 */ +#define P2 ft8 /* product word 0 */ +#define P3 ft9 /* product word 0 */ +#define P4 ft10 /* product word 0 */ +#define P5 ft11 /* product word 0 */ +#define P6 ft12 /* product word 0 */ +#define P7 ft13 /* product word 0 */ + + + + +/* ====================================================================== */ +/* symbolic definitions for HP-UX stack offsets */ +/* symbolic definitions for memory NOPs */ +/* ====================================================================== */ + +#define ST_SZ 192 /* stack area total size */ + +#define SV0 -192(sp) /* general register save area */ +#define SV1 -184(sp) +#define SV2 -176(sp) +#define SV3 -168(sp) +#define SV4 -160(sp) +#define SV5 -152(sp) +#define SV6 -144(sp) +#define SV7 -136(sp) + +#define XF0 -128(sp) /* data transfer area */ +#define XF1 -120(sp) /* for floating-pt to integer regs */ +#define XF2 -112(sp) +#define XF3 -104(sp) +#define XF4 -96(sp) +#define XF5 -88(sp) +#define XF6 -80(sp) +#define XF7 -72(sp) +#define XF8 -64(sp) +#define XF9 -56(sp) +#define XF10 -48(sp) +#define XF11 -40(sp) +#define XF12 -32(sp) +#define XF13 -24(sp) +#define XF14 -16(sp) +#define XF15 -8(sp) + +#define mnop proberi (sp),3,zero /* memory NOP */ + + + + +/* ====================================================================== */ +/* assembler formalities */ +/* ====================================================================== */ + +#ifdef __LP64__ + .level 2.0W +#else + .level 2.0 +#endif + .space $TEXT$ + .subspa $CODE$ + .align 16 + +/* ====================================================================== */ +/* here to compute 64-bit x 512-bit product + 512-bit addend */ +/* ====================================================================== */ + +multacc512 + .PROC + .CALLINFO + .ENTRY + fldd 0(pM),M ; multiplier double word + ldo ST_SZ(sp),sp ; push stack + + fldd 0(pA),A0 ; multiplicand double word 0 + std S1,SV1 ; save s1 + + fldd 16(pA),A2 ; multiplicand double word 2 + std S3,SV3 ; save s3 + + fldd 32(pA),A4 ; multiplicand double word 4 + std S5,SV5 ; save s5 + + fldd 48(pA),A6 ; multiplicand double word 6 + std S7,SV7 ; save s7 + + + std S0,SV0 ; save s0 + fldd 8(pA),A1 ; multiplicand double word 1 + xmpyu MR,A0L,P0 ; A0 cross 32-bit word products + xmpyu ML,A0R,P2 + + std S2,SV2 ; save s2 + fldd 24(pA),A3 ; multiplicand double word 3 + xmpyu MR,A2L,P4 ; A2 cross 32-bit word products + xmpyu ML,A2R,P6 + + std S4,SV4 ; save s4 + fldd 40(pA),A5 ; multiplicand double word 5 + + std S6,SV6 ; save s6 + fldd 56(pA),A7 ; multiplicand double word 7 + + + fstd P0,XF0 ; MR * A0L + xmpyu MR,A0R,P0 ; A0 right 32-bit word product + xmpyu MR,A1L,P1 ; A1 cross 32-bit word product + + fstd P2,XF2 ; ML * A0R + xmpyu ML,A0L,P2 ; A0 left 32-bit word product + xmpyu ML,A1R,P3 ; A1 cross 32-bit word product + + fstd P4,XF4 ; MR * A2L + xmpyu MR,A2R,P4 ; A2 right 32-bit word product + xmpyu MR,A3L,P5 ; A3 cross 32-bit word product + + fstd P6,XF6 ; ML * A2R + xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product + xmpyu ML,A3R,P7 ; A3 cross 32-bit word product + + + ldd XF0,S0 ; MR * A0L + fstd P1,XF1 ; MR * A1L + + ldd XF2,S2 ; ML * A0R + fstd P3,XF3 ; ML * A1R + + ldd XF4,S4 ; MR * A2L + fstd P5,XF5 ; MR * A3L + xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products + xmpyu ML,A1L,P3 + + ldd XF6,S6 ; ML * A2R + fstd P7,XF7 ; ML * A3R + xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products + xmpyu ML,A3L,P7 + + + fstd P0,XF0 ; MR * A0R + ldd XF1,S1 ; MR * A1L + nop + add S0,S2,T1 ; A0 cross product sum + + fstd P2,XF2 ; ML * A0L + ldd XF3,S3 ; ML * A1R + add,dc zero,zero,S0 ; A0 cross product sum carry + depd,z T1,31,32,S2 ; A0 cross product sum << 32 + + fstd P4,XF4 ; MR * A2R + ldd XF5,S5 ; MR * A3L + shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32 + add S4,S6,T3 ; A2 cross product sum + + fstd P6,XF6 ; ML * A2L + ldd XF7,S7 ; ML * A3R + add,dc zero,zero,S4 ; A2 cross product sum carry + depd,z T3,31,32,S6 ; A2 cross product sum << 32 + + + ldd XF0,S8 ; MR * A0R + fstd P1,XF1 ; MR * A1R + xmpyu MR,A4L,P0 ; A4 cross 32-bit word product + xmpyu MR,A5L,P1 ; A5 cross 32-bit word product + + ldd XF2,S10 ; ML * A0L + fstd P3,XF3 ; ML * A1L + xmpyu ML,A4R,P2 ; A4 cross 32-bit word product + xmpyu ML,A5R,P3 ; A5 cross 32-bit word product + + ldd XF4,S12 ; MR * A2R + fstd P5,XF5 ; MR * A3L + xmpyu MR,A6L,P4 ; A6 cross 32-bit word product + xmpyu MR,A7L,P5 ; A7 cross 32-bit word product + + ldd XF6,S14 ; ML * A2L + fstd P7,XF7 ; ML * A3L + xmpyu ML,A6R,P6 ; A6 cross 32-bit word product + xmpyu ML,A7R,P7 ; A7 cross 32-bit word product + + + fstd P0,XF0 ; MR * A4L + ldd XF1,S9 ; MR * A1R + shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32 + add S1,S3,T1 ; A1 cross product sum + + fstd P2,XF2 ; ML * A4R + ldd XF3,S11 ; ML * A1L + add,dc zero,zero,S1 ; A1 cross product sum carry + depd,z T1,31,32,S3 ; A1 cross product sum << 32 + + fstd P4,XF4 ; MR * A6L + ldd XF5,S13 ; MR * A3R + shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32 + add S5,S7,T3 ; A3 cross product sum + + fstd P6,XF6 ; ML * A6R + ldd XF7,S15 ; ML * A3L + add,dc zero,zero,S5 ; A3 cross product sum carry + depd,z T3,31,32,S7 ; A3 cross product sum << 32 + + + shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32 + add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword + + add,dc S0,S10,S10 ; M * A0 left doubleword + add S3,S9,S9 ; M * A1 right doubleword + + add,dc S1,S11,S11 ; M * A1 left doubleword + add S6,S12,S12 ; M * A2 right doubleword + + + ldd 24(pR),S3 ; Addend word 3 + fstd P1,XF1 ; MR * A5L + add,dc S4,S14,S14 ; M * A2 left doubleword + xmpyu MR,A5R,P1 ; A5 right 32-bit word product + + ldd 8(pR),S1 ; Addend word 1 + fstd P3,XF3 ; ML * A5R + add S7,S13,S13 ; M * A3 right doubleword + xmpyu ML,A5L,P3 ; A5 left 32-bit word product + + ldd 0(pR),S7 ; Addend word 0 + fstd P5,XF5 ; MR * A7L + add,dc S5,S15,S15 ; M * A3 left doubleword + xmpyu MR,A7R,P5 ; A7 right 32-bit word product + + ldd 16(pR),S5 ; Addend word 2 + fstd P7,XF7 ; ML * A7R + add S10,S9,S9 ; P1 doubleword + xmpyu ML,A7L,P7 ; A7 left 32-bit word products + + + ldd XF0,S0 ; MR * A4L + fstd P1,XF9 ; MR * A5R + add,dc S11,S12,S12 ; P2 doubleword + xmpyu MR,A4R,P0 ; A4 right 32-bit word product + + ldd XF2,S2 ; ML * A4R + fstd P3,XF11 ; ML * A5L + add,dc S14,S13,S13 ; P3 doubleword + xmpyu ML,A4L,P2 ; A4 left 32-bit word product + + ldd XF6,S6 ; ML * A6R + fstd P5,XF13 ; MR * A7R + add,dc zero,S15,T2 ; P4 partial doubleword + xmpyu MR,A6R,P4 ; A6 right 32-bit word product + + ldd XF4,S4 ; MR * A6L + fstd P7,XF15 ; ML * A7L + add S7,S8,S8 ; R0 + P0, new R0 doubleword + xmpyu ML,A6L,P6 ; A6 left 32-bit word product + + + fstd P0,XF0 ; MR * A4R + ldd XF7,S7 ; ML * A7R + add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword + + fstd P2,XF2 ; ML * A4L + ldd XF1,S1 ; MR * A5L + add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword + + fstd P4,XF4 ; MR * A6R + ldd XF5,S5 ; MR * A7L + add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword + + fstd P6,XF6 ; ML * A6L + ldd XF3,S3 ; ML * A5R + add,dc zero,T2,T2 ; c + partial P4 + add S0,S2,T1 ; A4 cross product sum + + + std S8,0(pR) ; save R0 + add,dc zero,zero,S0 ; A4 cross product sum carry + depd,z T1,31,32,S2 ; A4 cross product sum << 32 + + std S9,8(pR) ; save R1 + shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32 + add S4,S6,T3 ; A6 cross product sum + + std S12,16(pR) ; save R2 + add,dc zero,zero,S4 ; A6 cross product sum carry + depd,z T3,31,32,S6 ; A6 cross product sum << 32 + + + std S13,24(pR) ; save R3 + shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32 + add S1,S3,T1 ; A5 cross product sum + + ldd XF0,S8 ; MR * A4R + add,dc zero,zero,S1 ; A5 cross product sum carry + depd,z T1,31,32,S3 ; A5 cross product sum << 32 + + ldd XF2,S10 ; ML * A4L + ldd XF9,S9 ; MR * A5R + shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32 + add S5,S7,T3 ; A7 cross product sum + + ldd XF4,S12 ; MR * A6R + ldd XF11,S11 ; ML * A5L + add,dc zero,zero,S5 ; A7 cross product sum carry + depd,z T3,31,32,S7 ; A7 cross product sum << 32 + + ldd XF6,S14 ; ML * A6L + ldd XF13,S13 ; MR * A7R + shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32 + add S2,S8,S8 ; M * A4 right doubleword + + + ldd XF15,S15 ; ML * A7L + add,dc S0,S10,S10 ; M * A4 left doubleword + add S3,S9,S9 ; M * A5 right doubleword + + add,dc S1,S11,S11 ; M * A5 left doubleword + add S6,S12,S12 ; M * A6 right doubleword + + ldd 32(pR),S0 ; Addend word 4 + ldd 40(pR),S1 ; Addend word 5 + add,dc S4,S14,S14 ; M * A6 left doubleword + add S7,S13,S13 ; M * A7 right doubleword + + ldd 48(pR),S2 ; Addend word 6 + ldd 56(pR),S3 ; Addend word 7 + add,dc S5,S15,S15 ; M * A7 left doubleword + add S8,T2,S8 ; P4 doubleword + + ldd 64(pR),S4 ; Addend word 8 + ldd SV5,s5 ; restore s5 + add,dc S10,S9,S9 ; P5 doubleword + add,dc S11,S12,S12 ; P6 doubleword + + + ldd SV6,s6 ; restore s6 + ldd SV7,s7 ; restore s7 + add,dc S14,S13,S13 ; P7 doubleword + add,dc zero,S15,S15 ; P8 doubleword + + add S0,S8,S8 ; new R4 doubleword + + ldd SV0,s0 ; restore s0 + std S8,32(pR) ; save R4 + add,dc S1,S9,S9 ; new R5 doubleword + + ldd SV1,s1 ; restore s1 + std S9,40(pR) ; save R5 + add,dc S2,S12,S12 ; new R6 doubleword + + ldd SV2,s2 ; restore s2 + std S12,48(pR) ; save R6 + add,dc S3,S13,S13 ; new R7 doubleword + + ldd SV3,s3 ; restore s3 + std S13,56(pR) ; save R7 + add,dc S4,S15,S15 ; new R8 doubleword + + ldd SV4,s4 ; restore s4 + std S15,64(pR) ; save result[8] + add,dc zero,zero,v0 ; return carry from R8 + + CMPIB,*= 0,v0,$L0 ; if no overflow, exit + LDO 8(pR),pR + +$FINAL1 ; Final carry propagation + LDD 64(pR),v0 + LDO 8(pR),pR + ADDI 1,v0,v0 + CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry. + STD v0,56(pR) +$L0 + bv zero(rp) ; -> caller + ldo -ST_SZ(sp),sp ; pop stack + +/* ====================================================================== */ +/* end of module */ +/* ====================================================================== */ + + + bve (rp) + .EXIT + nop + .PROCEND + .SPACE $TEXT$ + .SUBSPA $CODE$ + .EXPORT multacc512,ENTRY + + .end diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s new file mode 100644 index 0000000000..c72de8a12b --- /dev/null +++ b/security/nss/lib/freebl/mpi/hppa20.s @@ -0,0 +1,904 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef __LP64__ + .LEVEL 2.0W +#else +; .LEVEL 1.1 +; .ALLOW 2.0N + .LEVEL 2.0 +#endif + .SPACE $TEXT$,SORT=8 + .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24 + +; *************************************************************** +; +; maxpy_[little/big] +; +; *************************************************************** + +; There is no default -- you must specify one or the other. +#define LITTLE_WORDIAN 1 + +#ifdef LITTLE_WORDIAN +#define EIGHT 8 +#define SIXTEEN 16 +#define THIRTY_TWO 32 +#define UN_EIGHT -8 +#define UN_SIXTEEN -16 +#define UN_TWENTY_FOUR -24 +#endif + +#ifdef BIG_WORDIAN +#define EIGHT -8 +#define SIXTEEN -16 +#define THIRTY_TWO -32 +#define UN_EIGHT 8 +#define UN_SIXTEEN 16 +#define UN_TWENTY_FOUR 24 +#endif + +; This performs a multiple-precision integer version of "daxpy", +; Using the selected addressing direction. "Little-wordian" means that +; the least significant word of a number is stored at the lowest address. +; "Big-wordian" means that the most significant word is at the lowest +; address. Either way, the incoming address of the vector is that +; of the least significant word. That means that, for little-wordian +; addressing, we move the address upward as we propagate carries +; from the least significant word to the most significant. For +; big-wordian we move the address downward. + +; We use the following registers: +; +; r2 return PC, of course +; r26 = arg1 = length +; r25 = arg2 = address of scalar +; r24 = arg3 = multiplicand vector +; r23 = arg4 = result vector +; +; fr9 = scalar loaded once only from r25 + +; The cycle counts shown in the bodies below are simply the result of a +; scheduling by hand. The actual PCX-U hardware does it differently. +; The intention is that the overall speed is the same. + +; The pipeline startup and shutdown code is constructed in the usual way, +; by taking the loop bodies and removing unnecessary instructions. +; We have left the comments describing cycle numbers in the code. +; These are intended for reference when comparing with the main loop, +; and have no particular relationship to actual cycle numbers. + +#ifdef LITTLE_WORDIAN +maxpy_little +#else +maxpy_big +#endif + .PROC + .CALLINFO FRAME=120,ENTRY_GR=4 + .ENTRY + STW,MA %r3,128(%sp) + STW %r4,-124(%sp) + + ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately. + FLDD 0(%r25),%fr9 ; fr9 = scalar + +; First startup + + FLDD 0(%r24),%fr24 ; Cycle 1 + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3 + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + FSTD %fr24,-96(%sp) + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + FSTD %fr25,-80(%sp) + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + FSTD %fr27,-48(%sp) + +; Second startup + + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + FSTD %fr30,-56(%sp) + FLDD 0(%r24),%fr24 + + FSTD %fr26,-88(%sp) ; Cycle 2 + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + LDD -56(%sp),%r20 + ADD %r21,%r3,%r3 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + LDD -104(%sp),%r31 + ADD,DC %r0,%r0,%r20 + SHRPD %r19,%r3,32,%r3 + + LDD -72(%sp),%r29 ; Cycle 9 + SHRPD %r20,%r19,32,%r20 + ADD %r21,%r1,%r1 + + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + ADD,DC %r3,%r4,%r4 + FSTD %fr24,-96(%sp) + + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + ADD,DC %r0,%r20,%r20 + LDD 0(%r23),%r3 + FSTD %fr25,-80(%sp) + + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + ADD %r0,%r0,%r0 ; clear the carry bit + ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12 + FSTD %fr27,-48(%sp) +; MFCTL %cr16,%r21 ; for timing +; STD %r21,-112(%sp) + +; Here is the loop. + +$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + ADD,DC %r29,%r4,%r4 + FSTD %fr30,-56(%sp) + FLDD 0(%r24),%fr24 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + ADD %r3,%r1,%r1 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + ADD,DC %r21,%r4,%r28 + FSTD %fr29,-72(%sp) + LDD -96(%sp),%r3 + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + ADD,DC %r20,%r31,%r22 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + ADD %r21,%r3,%r3 + LDD -56(%sp),%r20 + STD %r1,UN_SIXTEEN(%r23) + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + SHRPD %r3,%r0,32,%r21 + LDD -88(%sp),%r4 + LDD -48(%sp),%r1 + + ADD,DC %r0,%r0,%r20 ; Cycle 8 + SHRPD %r19,%r3,32,%r3 + FLDD EIGHT(%r24),%fr28 + LDD -104(%sp),%r31 + + SHRPD %r20,%r19,32,%r20 ; Cycle 9 + ADD %r21,%r1,%r1 + STD %r28,UN_EIGHT(%r23) + LDD -72(%sp),%r29 + + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + ADD,DC %r3,%r4,%r4 + FSTD %fr24,-96(%sp) + + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + ADD,DC %r0,%r20,%r20 + FSTD %fr25,-80(%sp) + LDD 0(%r23),%r3 + + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + ADD %r22,%r1,%r1 + ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12 + FSTD %fr27,-48(%sp) + +$ENDLOOP + +; Shutdown code, first stage. + +; MFCTL %cr16,%r21 ; for timing +; STD %r21,UN_SIXTEEN(%r23) +; LDD -112(%sp),%r21 +; STD %r21,UN_EIGHT(%r23) + + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + ADD,DC %r29,%r4,%r4 + CMPIB,= 0,%r26,$ONEMORE + FSTD %fr30,-56(%sp) + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + ADD %r3,%r1,%r1 ; Cycle 3 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + FSTD %fr29,-72(%sp) + STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9 + LDD -96(%sp),%r3 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + STD %r1,UN_SIXTEEN(%r23) +$JOIN4 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + ADD %r21,%r3,%r3 ; Cycle 6 + LDD -56(%sp),%r20 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + SHRPD %r3,%r0,32,%r21 + LDD -88(%sp),%r4 + LDD -48(%sp),%r1 + + ADD,DC %r0,%r0,%r20 ; Cycle 8 + SHRPD %r19,%r3,32,%r3 + LDD -104(%sp),%r31 + + SHRPD %r20,%r19,32,%r20 ; Cycle 9 + ADD %r21,%r1,%r1 + LDD -72(%sp),%r29 + + ADD,DC %r3,%r4,%r4 ; Cycle 10 + + ADD,DC %r0,%r20,%r20 ; Cycle 11 + LDD 0(%r23),%r3 + + ADD %r22,%r1,%r1 ; Cycle 13 + +; Shutdown code, second stage. + + ADD,DC %r29,%r4,%r4 ; Cycle 1 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + + STD %r1,UN_SIXTEEN(%r23); Cycle 6 + + STD %r28,UN_EIGHT(%r23) ; Cycle 9 + + LDD 0(%r23),%r3 ; Cycle 11 + +; Shutdown code, third stage. + + LDO SIXTEEN(%r23),%r23 + ADD %r3,%r22,%r1 +$JOIN1 ADD,DC %r0,%r0,%r21 + CMPIB,*= 0,%r21,$L0 ; if no overflow, exit + STD %r1,UN_SIXTEEN(%r23) + +; Final carry propagation + +$FINAL1 LDO EIGHT(%r23),%r23 + LDD UN_SIXTEEN(%r23),%r21 + ADDI 1,%r21,%r21 + CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry. + STD %r21,UN_SIXTEEN(%r23) + B $L0 + NOP + +; Here is the code that handles the difficult cases N=1, N=2, and N=3. +; We do the usual trick -- branch out of the startup code at appropriate +; points, and branch into the shutdown code. + +$N_IS_SMALL + CMPIB,= 0,%r26,$N_IS_ONE + FSTD %fr24,-96(%sp) ; Cycle 10 + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + FSTD %fr25,-80(%sp) + FSTD %fr31,-64(%sp) ; Cycle 12 + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + FSTD %fr27,-48(%sp) + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + CMPIB,= 2,%r26,$N_IS_THREE + FSTD %fr30,-56(%sp) + +; N = 2 + FSTD %fr26,-88(%sp) ; Cycle 2 + FSTD %fr28,-104(%sp) ; Cycle 3 + LDD -96(%sp),%r3 ; Cycle 4 + FSTD %fr29,-72(%sp) + B $JOIN4 + ADD %r0,%r0,%r22 + +$N_IS_THREE + FLDD SIXTEEN(%r24),%fr24 + FSTD %fr26,-88(%sp) ; Cycle 2 + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + B $JOIN3 + ADD %r0,%r0,%r22 + +$N_IS_ONE + FSTD %fr25,-80(%sp) + FSTD %fr27,-48(%sp) + FSTD %fr26,-88(%sp) ; Cycle 2 + B $JOIN5 + ADD %r0,%r0,%r22 + +; We came out of the unrolled loop with wrong parity. Do one more +; single cycle. This is quite tricky, because of the way the +; carry chains and SHRPD chains have been chopped up. + +$ONEMORE + + FLDD 0(%r24),%fr24 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + ADD %r3,%r1,%r1 + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + ADD,DC %r21,%r4,%r28 + STD %r28,UN_EIGHT(%r23) ; moved from cycle 9 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + ADD,DC %r20,%r31,%r22 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + STD %r1,UN_SIXTEEN(%r23); Cycle 6 +$JOIN3 + XMPYU %fr9L,%fr24R,%fr24 + LDD -56(%sp),%r20 + ADD %r21,%r3,%r3 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + + LDD -104(%sp),%r31 ; Cycle 8 + ADD,DC %r0,%r0,%r20 + SHRPD %r19,%r3,32,%r3 + + LDD -72(%sp),%r29 ; Cycle 9 + SHRPD %r20,%r19,32,%r20 + ADD %r21,%r1,%r1 + + ADD,DC %r3,%r4,%r4 ; Cycle 10 + FSTD %fr24,-96(%sp) + + ADD,DC %r0,%r20,%r20 ; Cycle 11 + LDD 0(%r23),%r3 + FSTD %fr25,-80(%sp) + + ADD %r22,%r1,%r1 ; Cycle 13 + FSTD %fr27,-48(%sp) + +; Shutdown code, stage 1-1/2. + + ADD,DC %r29,%r4,%r4 ; Cycle 1 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + STD %r28,UN_EIGHT(%r23) ; moved from cycle 9 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + STD %r1,UN_SIXTEEN(%r23) +$JOIN5 + LDD -96(%sp),%r3 ; moved from cycle 4 + LDD -80(%sp),%r21 + ADD %r21,%r3,%r3 ; Cycle 6 + ADD,DC %r0,%r0,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + SHRPD %r19,%r3,32,%r3 ; Cycle 8 + ADD %r21,%r1,%r1 ; Cycle 9 + ADD,DC %r3,%r4,%r4 ; Cycle 10 + LDD 0(%r23),%r3 ; Cycle 11 + ADD %r22,%r1,%r1 ; Cycle 13 + +; Shutdown code, stage 2-1/2. + + ADD,DC %r0,%r4,%r4 ; Cycle 1 + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + STD %r1,UN_SIXTEEN(%r23) + ADD,DC %r21,%r4,%r1 + B $JOIN1 + LDO EIGHT(%r23),%r23 + +; exit + +$L0 + LDW -124(%sp),%r4 + BVE (%r2) + .EXIT + LDW,MB -128(%sp),%r3 + + .PROCEND + +; *************************************************************** +; +; add_diag_[little/big] +; +; *************************************************************** + +; The arguments are as follows: +; r2 return PC, of course +; r26 = arg1 = length +; r25 = arg2 = vector to square +; r24 = arg3 = result vector + +#ifdef LITTLE_WORDIAN +add_diag_little +#else +add_diag_big +#endif + .PROC + .CALLINFO FRAME=120,ENTRY_GR=4 + .ENTRY + STW,MA %r3,128(%sp) + STW %r4,-124(%sp) + + ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately. + NOP + +; Startup code + + FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body) + XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4 + XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr30 + LDO SIXTEEN(%r25),%r25 ; Cycle 6 + FSTD %fr29,-88(%sp) + FSTD %fr27,-72(%sp) ; Cycle 7 + CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body) + FSTD %fr30,-96(%sp) + FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2 + LDD -88(%sp),%r22 ; Cycle 3 + LDD -72(%sp),%r31 ; Cycle 4 + XMPYU %fr7R,%fr7R,%fr28 + XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr31 + LDD -96(%sp),%r20 ; Cycle 6 + FSTD %fr28,-80(%sp) + ADD %r0,%r0,%r0 ; clear the carry bit + ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7 + FSTD %fr24,-64(%sp) + +; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body". + +$DIAGLOOP + SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body) + LDO SIXTEEN(%r25),%r25 + LDD 0(%r24),%r1 + FSTD %fr31,-104(%sp) + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD,DC %r22,%r3,%r3 + FLDD UN_SIXTEEN(%r25),%fr7 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + ADD %r1,%r3,%r3 + XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4 + LDD -80(%sp),%r21 + STD %r3,0(%r24) + XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr30 + LDD -64(%sp),%r29 + LDD EIGHT(%r24),%r1 + ADD,DC %r4,%r20,%r20 ; Cycle 6 + LDD -104(%sp),%r19 + FSTD %fr29,-88(%sp) + ADD %r20,%r1,%r1 ; Cycle 7 + FSTD %fr27,-72(%sp) + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + LDD UN_SIXTEEN(%r24),%r28 + FSTD %fr30,-96(%sp) + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD,DC %r21,%r4,%r4 + FLDD UN_EIGHT(%r25),%fr7 + STD %r1,UN_TWENTY_FOUR(%r24) + ADD,DC %r0,%r19,%r19 ; Cycle 3 + ADD %r28,%r4,%r4 + XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4 + LDD -88(%sp),%r22 + STD %r4,UN_SIXTEEN(%r24) + XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr31 + LDD -72(%sp),%r31 + LDD UN_EIGHT(%r24),%r28 + ADD,DC %r3,%r19,%r19 ; Cycle 6 + LDD -96(%sp),%r20 + FSTD %fr28,-80(%sp) + ADD %r19,%r28,%r28 ; Cycle 7 + FSTD %fr24,-64(%sp) + ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8 + STD %r28,UN_EIGHT(%r24) + +$ENDDIAGLOOP + + ADD,DC %r0,%r22,%r22 + CMPIB,= 0,%r26,$ONEMOREDIAG + SHRPD %r31,%r0,31,%r3 + +; Shutdown code, first stage. + + FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + LDD -80(%sp),%r21 + ADD %r3,%r28,%r3 + LDD -64(%sp),%r29 ; Cycle 4 + STD %r3,0(%r24) + LDD EIGHT(%r24),%r1 ; Cycle 5 + LDO SIXTEEN(%r25),%r25 ; Cycle 6 + LDD -104(%sp),%r19 + ADD,DC %r4,%r20,%r20 + ADD %r20,%r1,%r1 ; Cycle 7 + ADD,DC %r0,%r21,%r21 ; Cycle 8 + STD %r1,EIGHT(%r24) + +; Shutdown code, second stage. + + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + LDD UN_SIXTEEN(%r24),%r1 + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD %r4,%r21,%r4 + ADD,DC %r0,%r19,%r19 ; Cycle 3 + ADD %r4,%r1,%r4 + STD %r4,UN_SIXTEEN(%r24); Cycle 4 + LDD UN_EIGHT(%r24),%r28 ; Cycle 5 + ADD,DC %r3,%r19,%r19 ; Cycle 6 + ADD %r19,%r28,%r28 ; Cycle 7 + ADD,DC %r0,%r0,%r22 ; Cycle 8 + CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit + STD %r28,UN_EIGHT(%r24) + +; Final carry propagation + +$FDIAG2 + LDO EIGHT(%r24),%r24 + LDD UN_EIGHT(%r24),%r26 + ADDI 1,%r26,%r26 + CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry. + STD %r26,UN_EIGHT(%r24) + + B $Z0 + NOP + +; Here is the code that handles the difficult case N=1. +; We do the usual trick -- branch out of the startup code at appropriate +; points, and branch into the shutdown code. + +$DIAG_N_IS_ONE + + LDD -88(%sp),%r22 + LDD -72(%sp),%r31 + B $JOINDIAG + LDD -96(%sp),%r20 + +; We came out of the unrolled loop with wrong parity. Do one more +; single cycle. This is the "alternate body". It will, of course, +; give us opposite registers from the other case, so we need +; completely different shutdown code. + +$ONEMOREDIAG + FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + FLDD 0(%r25),%fr7 ; Cycle 2 + SHRPD %r0,%r31,31,%r4 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + LDD -80(%sp),%r21 + ADD %r3,%r28,%r3 + LDD -64(%sp),%r29 ; Cycle 4 + STD %r3,0(%r24) + XMPYU %fr7R,%fr7R,%fr29 + LDD EIGHT(%r24),%r1 ; Cycle 5 + XMPYU %fr7L,%fr7R,%fr27 + XMPYU %fr7L,%fr7L,%fr30 + LDD -104(%sp),%r19 ; Cycle 6 + FSTD %fr29,-88(%sp) + ADD,DC %r4,%r20,%r20 + FSTD %fr27,-72(%sp) ; Cycle 7 + ADD %r20,%r1,%r1 + ADD,DC %r0,%r21,%r21 ; Cycle 8 + STD %r1,EIGHT(%r24) + +; Shutdown code, first stage. + + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + FSTD %fr30,-96(%sp) + LDD UN_SIXTEEN(%r24),%r1 + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD %r4,%r21,%r4 + ADD,DC %r0,%r19,%r19 ; Cycle 3 + LDD -88(%sp),%r22 + ADD %r4,%r1,%r4 + LDD -72(%sp),%r31 ; Cycle 4 + STD %r4,UN_SIXTEEN(%r24) + LDD UN_EIGHT(%r24),%r28 ; Cycle 5 + LDD -96(%sp),%r20 ; Cycle 6 + ADD,DC %r3,%r19,%r19 + ADD %r19,%r28,%r28 ; Cycle 7 + ADD,DC %r0,%r22,%r22 ; Cycle 8 + STD %r28,UN_EIGHT(%r24) + +; Shutdown code, second stage. + +$JOINDIAG + SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + ADD %r3,%r28,%r3 + STD %r3,0(%r24) ; Cycle 4 + LDD EIGHT(%r24),%r1 ; Cycle 5 + ADD,DC %r4,%r20,%r20 + ADD %r20,%r1,%r1 ; Cycle 7 + ADD,DC %r0,%r0,%r21 ; Cycle 8 + CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit + STD %r1,EIGHT(%r24) + +; Final carry propagation + +$FDIAG1 + LDO EIGHT(%r24),%r24 + LDD EIGHT(%r24),%r26 + ADDI 1,%r26,%r26 + CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry. + STD %r26,EIGHT(%r24) + +$Z0 + LDW -124(%sp),%r4 + BVE (%r2) + .EXIT + LDW,MB -128(%sp),%r3 + .PROCEND +; .ALLOW + + .SPACE $TEXT$ + .SUBSPA $CODE$ +#ifdef LITTLE_WORDIAN +#ifdef __GNUC__ +; GNU-as (as of 2.19) does not support LONG_RETURN + .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR + .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR +#else + .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN + .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN +#endif +#else + .EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN + .EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN +#endif + .END + + +; How to use "maxpy_PA20_little" and "maxpy_PA20_big" +; +; The routine "maxpy_PA20_little" or "maxpy_PA20_big" +; performs a 64-bit x any-size multiply, and adds the +; result to an area of memory. That is, it performs +; something like +; +; A B C D +; * Z +; __________ +; P Q R S T +; +; and then adds the "PQRST" vector into an area of memory, +; handling all carries. +; +; Digression on nomenclature and endian-ness: +; +; Each of the capital letters in the above represents a 64-bit +; quantity. That is, you could think of the discussion as +; being in terms of radix-16-quintillion arithmetic. The data +; type being manipulated is "unsigned long long int". This +; requires the 64-bit extension of the HP-UX C compiler, +; available at release 10. You need these compiler flags to +; enable these extensions: +; +; -Aa +e +DA2.0 +DS2.0 +; +; (The first specifies ANSI C, the second enables the +; extensions, which are beyond ANSI C, and the third and +; fourth tell the compiler to use whatever features of the +; PA2.0 architecture it wishes, in order to made the code more +; efficient. Since the presence of the assembly code will +; make the program unable to run on anything less than PA2.0, +; you might as well gain the performance enhancements in the C +; code as well.) +; +; Questions of "endian-ness" often come up, usually in the +; context of byte ordering in a word. These routines have a +; similar issue, that could be called "wordian-ness". +; Independent of byte ordering (PA is always big-endian), one +; can make two choices when representing extremely large +; numbers as arrays of 64-bit doublewords in memory. +; +; "Little-wordian" layout means that the least significant +; word of a number is stored at the lowest address. +; +; MSW LSW +; | | +; V V +; +; A B C D E +; +; ^ ^ ^ +; | | |____ address 0 +; | | +; | |_______address 8 +; | +; address 32 +; +; "Big-wordian" means that the most significant word is at the +; lowest address. +; +; MSW LSW +; | | +; V V +; +; A B C D E +; +; ^ ^ ^ +; | | |____ address 32 +; | | +; | |_______address 24 +; | +; address 0 +; +; When you compile the file, you must specify one or the other, with +; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN". +; +; Incidentally, you assemble this file as part of your +; project with the same C compiler as the rest of the program. +; My "makefile" for a superprecision arithmetic package has +; the following stuff: +; +; # definitions: +; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1 +; CFLAGS = +O3 +; LDFLAGS = -L /usr/lib -Wl,-aarchive +; +; # general build rule for ".s" files: +; .s.o: +; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN +; +; # Now any bind step that calls for pa20.o will assemble pa20.s +; +; End of digression, back to arithmetic: +; +; The way we multiply two huge numbers is, of course, to multiply +; the "ABCD" vector by each of the "WXYZ" doublewords, adding +; the result vectors with increasing offsets, the way we learned +; in school, back before we all used calculators: +; +; A B C D +; * W X Y Z +; __________ +; P Q R S T +; E F G H I +; M N O P Q +; + R S T U V +; _______________ +; F I N A L S U M +; +; So we call maxpy_PA20_big (in my case; my package is +; big-wordian) repeatedly, giving the W, X, Y, and Z arguments +; in turn as the "scalar", and giving the "ABCD" vector each +; time. We direct it to add its result into an area of memory +; that we have cleared at the start. We skew the exact +; location into that area with each call. +; +; The prototype for the function is +; +; extern void maxpy_PA20_big( +; int length, /* Number of doublewords in the multiplicand vector. */ +; const long long int *scalaraddr, /* Address to fetch the scalar. */ +; const long long int *multiplicand, /* The multiplicand vector. */ +; long long int *result); /* Where to accumulate the result. */ +; +; (You should place a copy of this prototype in an include file +; or in your C file.) +; +; Now, IN ALL CASES, the given address for the multiplicand or +; the result is that of the LEAST SIGNIFICANT DOUBLEWORD. +; That word is, of course, the word at which the routine +; starts processing. "maxpy_PA20_little" then increases the +; addresses as it computes. "maxpy_PA20_big" decreases them. +; +; In our example above, "length" would be 4 in each case. +; "multiplicand" would be the "ABCD" vector. Specifically, +; the address of the element "D". "scalaraddr" would be the +; address of "W", "X", "Y", or "Z" on the four calls that we +; would make. (The order doesn't matter, of course.) +; "result" would be the appropriate address in the result +; area. When multiplying by "Z", that would be the least +; significant word. When multiplying by "Y", it would be the +; next higher word (8 bytes higher if little-wordian; 8 bytes +; lower if big-wordian), and so on. The size of the result +; area must be the the sum of the sizes of the multiplicand +; and multiplier vectors, and must be initialized to zero +; before we start. +; +; Whenever the routine adds its partial product into the result +; vector, it follows carry chains as far as they need to go. +; +; Here is the super-precision multiply routine that I use for +; my package. The package is big-wordian. I have taken out +; handling of exponents (it's a floating point package): +; +; static void mul_PA20( +; int size, +; const long long int *arg1, +; const long long int *arg2, +; long long int *result) +; { +; int i; +; +; for (i=0 ; i<2*size ; i++) result[i] = 0ULL; +; +; for (i=0 ; i +#else +#define floor(d) ((double)((unsigned long long)(d))) +#endif + +static double +upper32(double x) +{ + return floor(x * TwoToMinus32); +} + +static double +lower32(double x, double y) +{ + return x - TwoTo32 * floor(x * TwoToMinus32); +} + +static double +mod(double x, double oneoverm, double m) +{ + return x - m * floor(x * oneoverm); +} + +#endif + +static void +cleanup(double *dt, int from, int tlen) +{ + int i; + double tmp, tmp1, x, x1; + + tmp = tmp1 = Zero; + /* original code ** + for(i=2*from;i<2*tlen-2;i++) + { + x=dt[i]; + dt[i]=lower32(x,Zero)+tmp1; + tmp1=tmp; + tmp=upper32(x); + } + dt[tlen-2]+=tmp1; + dt[tlen-1]+=tmp; + **end original code ***/ + /* new code ***/ + for (i = 2 * from; i < 2 * tlen; i += 2) { + x = dt[i]; + x1 = dt[i + 1]; + dt[i] = lower32(x, Zero) + tmp; + dt[i + 1] = lower32(x1, Zero) + tmp1; + tmp = upper32(x); + tmp1 = upper32(x1); + } + /** end new code **/ +} + +void +conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +{ + int i; + long long t, t1, a, b, c, d; + + t1 = 0; + a = (long long)d16[0]; + b = (long long)d16[1]; + for (i = 0; i < ilen - 1; i++) { + c = (long long)d16[2 * i + 2]; + t1 += (unsigned int)a; + t = (a >> 32); + d = (long long)d16[2 * i + 3]; + t1 += (b & 0xffff) << 16; + t += (b >> 16) + (t1 >> 32); + i32[i] = (unsigned int)t1; + t1 = t; + a = c; + b = d; + } + t1 += (unsigned int)a; + t = (a >> 32); + t1 += (b & 0xffff) << 16; + i32[i] = (unsigned int)t1; +} + +void +conv_i32_to_d32(double *d32, unsigned int *i32, int len) +{ + int i; + +#pragma pipeloop(0) + for (i = 0; i < len; i++) + d32[i] = (double)(i32[i]); +} + +void +conv_i32_to_d16(double *d16, unsigned int *i32, int len) +{ + int i; + unsigned int a; + +#pragma pipeloop(0) + for (i = 0; i < len; i++) { + a = i32[i]; + d16[2 * i] = (double)(a & 0xffff); + d16[2 * i + 1] = (double)(a >> 16); + } +} + +void +conv_i32_to_d32_and_d16(double *d32, double *d16, + unsigned int *i32, int len) +{ + int i = 0; + unsigned int a; + +#pragma pipeloop(0) +#ifdef RF_INLINE_MACROS + for (; i < len - 3; i += 4) { + i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, + &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i]))); + } +#endif + for (; i < len; i++) { + a = i32[i]; + d32[i] = (double)(i32[i]); + d16[2 * i] = (double)(a & 0xffff); + d16[2 * i + 1] = (double)(a >> 16); + } +} + +void +adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +{ + long long acc; + int i; + + if (i32[len] > 0) + i = -1; + else { + for (i = len - 1; i >= 0; i--) { + if (i32[i] != nint[i]) + break; + } + } + if ((i < 0) || (i32[i] > nint[i])) { + acc = 0; + for (i = 0; i < len; i++) { + acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]); + i32[i] = (unsigned int)acc; + acc = acc >> 32; + } + } +} + +/* +** the lengths of the input arrays should be at least the following: +** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +** all of them should be different from one another +** +*/ +void +mont_mulf_noconv(unsigned int *result, + double *dm1, double *dm2, double *dt, + double *dn, unsigned int *nint, + int nlen, double dn0) +{ + int i, j, jj; + int tmp; + double digit, m2j, nextm2j, a, b; + double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; + + pdm1 = &(dm1[0]); + pdm2 = &(dm2[0]); + pdn = &(dn[0]); + pdm2[2 * nlen] = Zero; + + if (nlen != 16) { + for (i = 0; i < 4 * nlen + 2; i++) + dt[i] = Zero; + + a = dt[0] = pdm1[0] * pdm2[0]; + digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); + + pdtj = &(dt[0]); + for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { + m2j = pdm2[j]; + a = pdtj[0] + pdn[0] * digit; + b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; + pdtj[1] = b; + +#pragma pipeloop(0) + for (i = 1; i < nlen; i++) { + pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; + } + if ((jj == 30)) { + cleanup(dt, j / 2 + 1, 2 * nlen + 1); + jj = 0; + } + + digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16); + } + } else { + a = dt[0] = pdm1[0] * pdm2[0]; + + dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = + dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] = + dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] = + dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] = + dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] = + dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = + dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] = + dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] = + dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] = + dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] = + dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero; + + pdn_0 = pdn[0]; + pdm1_0 = pdm1[0]; + + digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); + pdtj = &(dt[0]); + + for (j = 0; j < 32; j++, pdtj++) { + + m2j = pdm2[j]; + a = pdtj[0] + pdn_0 * digit; + b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16; + pdtj[1] = b; + + /**** this loop will be fully unrolled: + for(i=1;i<16;i++) + { + pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit; + } + *************************************/ + pdtj[2] += pdm1[1] * m2j + pdn[1] * digit; + pdtj[4] += pdm1[2] * m2j + pdn[2] * digit; + pdtj[6] += pdm1[3] * m2j + pdn[3] * digit; + pdtj[8] += pdm1[4] * m2j + pdn[4] * digit; + pdtj[10] += pdm1[5] * m2j + pdn[5] * digit; + pdtj[12] += pdm1[6] * m2j + pdn[6] * digit; + pdtj[14] += pdm1[7] * m2j + pdn[7] * digit; + pdtj[16] += pdm1[8] * m2j + pdn[8] * digit; + pdtj[18] += pdm1[9] * m2j + pdn[9] * digit; + pdtj[20] += pdm1[10] * m2j + pdn[10] * digit; + pdtj[22] += pdm1[11] * m2j + pdn[11] * digit; + pdtj[24] += pdm1[12] * m2j + pdn[12] * digit; + pdtj[26] += pdm1[13] * m2j + pdn[13] * digit; + pdtj[28] += pdm1[14] * m2j + pdn[14] * digit; + pdtj[30] += pdm1[15] * m2j + pdn[15] * digit; + /* no need for cleenup, cannot overflow */ + digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16); + } + } + + conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1); + + adjust_montf_result(result, nint, nlen); +} diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h new file mode 100644 index 0000000000..69bed4acb1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.h @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* The functions that are to be called from outside of the .s file have the + * following interfaces and array size requirements: + */ + +void conv_i32_to_d32(double *d32, unsigned int *i32, int len); + +/* Converts an array of int's to an array of doubles, so that each double + * corresponds to an int. len is the number of items converted. + * Does not allocate the output array. + * The pointers d32 and i32 should point to arrays of size at least len + * (doubles and unsigned ints, respectively) + */ + +void conv_i32_to_d16(double *d16, unsigned int *i32, int len); + +/* Converts an array of int's to an array of doubles so that each element + * of the int array is converted to a pair of doubles, the first one + * corresponding to the lower (least significant) 16 bits of the int and + * the second one corresponding to the upper (most significant) 16 bits of + * the 32-bit int. len is the number of ints converted. + * Does not allocate the output array. + * The pointer d16 should point to an array of doubles of size at least + * 2*len and i32 should point an array of ints of size at least len + */ + +void conv_i32_to_d32_and_d16(double *d32, double *d16, + unsigned int *i32, int len); + +/* Does the above two conversions together, it is much faster than doing + * both of those in succession + */ + +void mont_mulf_noconv(unsigned int *result, + double *dm1, double *dm2, double *dt, + double *dn, unsigned int *nint, + int nlen, double dn0); + +/* Does the Montgomery multiplication of the numbers stored in the arrays + * pointed to by dm1 and dm2, writing the result to the array pointed to by + * result. It uses the array pointed to by dt as a temporary work area. + * nint should point to the modulus in the array-of-integers representation, + * dn should point to its array-of-doubles as obtained as a result of the + * function call conv_i32_to_d32(dn, nint, nlen); + * nlen is the length of the array containing the modulus. + * The representation used for dm1 is the one that is a result of the function + * call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the + * result of the function call conv_i32_to_d16(dm2, m2, nlen). + * Note that m1 and m2 should both be of length nlen, so they should be + * padded with 0's if necessary before the conversion. The result comes in + * this form (int representation, padded with 0's). + * dn0 is the value of the 16 least significant bits of n0'. + * The function does not allocate memory for any of the arrays, so the + * pointers should point to arrays with the following minimal sizes: + * result - nlen+1 + * dm1 - nlen + * dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons ) + * dt - 4*nlen+2 + * dn - nlen + * nint - nlen + * No two arrays should point to overlapping areas of memory. + */ diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il new file mode 100644 index 0000000000..4952d0fb82 --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.il @@ -0,0 +1,108 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! +! double upper32(double /*frs1*/); +! + .inline upper32,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f10 + + fdtox %f10,%f10 + fitod %f10,%f0 + .end + +! +! double lower32(double /*frs1*/, double /* Zero */); +! + .inline lower32,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f10 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f12 + + fdtox %f10,%f10 + fmovs %f12,%f10 + fxtod %f10,%f0 + .end + +! +! double mod(double /*x*/, double /*1/m*/, double /*m*/); +! + .inline mod,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f2 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f6 + + fmuld %f2,%f4,%f4 + fdtox %f4,%f4 + fxtod %f4,%f4 + fmuld %f4,%f6,%f4 + fsubd %f2,%f4,%f0 + .end + + +! +! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! double * /* 0 */, +! double * /*result16*/, double * /* result32 */ +! float * /*source - should be unsigned int* +! converted to float* */); +! + .inline i16_to_d16_and_d32x4,24 + ldd [%o0],%f2 ! 1/(2^16) + ldd [%o1],%f4 ! 2^16 + ldd [%o2],%f22 + + fmovd %f22,%f6 + ld [%o5],%f7 + fmovd %f22,%f10 + ld [%o5+4],%f11 + fmovd %f22,%f14 + ld [%o5+8],%f15 + fmovd %f22,%f18 + ld [%o5+12],%f19 + fxtod %f6,%f6 + std %f6,[%o4] + fxtod %f10,%f10 + std %f10,[%o4+8] + fxtod %f14,%f14 + std %f14,[%o4+16] + fxtod %f18,%f18 + std %f18,[%o4+24] + fmuld %f2,%f6,%f8 + fmuld %f2,%f10,%f12 + fmuld %f2,%f14,%f16 + fmuld %f2,%f18,%f20 + fdtox %f8,%f8 + fdtox %f12,%f12 + fdtox %f16,%f16 + fdtox %f20,%f20 + fxtod %f8,%f8 + std %f8,[%o3+8] + fxtod %f12,%f12 + std %f12,[%o3+24] + fxtod %f16,%f16 + std %f16,[%o3+40] + fxtod %f20,%f20 + std %f20,[%o3+56] + fmuld %f8,%f4,%f8 + fmuld %f12,%f4,%f12 + fmuld %f16,%f4,%f16 + fmuld %f20,%f4,%f20 + fsubd %f6,%f8,%f8 + std %f8,[%o3] + fsubd %f10,%f12,%f12 + std %f12,[%o3+16] + fsubd %f14,%f16,%f16 + std %f16,[%o3+32] + fsubd %f18,%f20,%f20 + std %f20,[%o3+48] + .end + + diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s new file mode 100644 index 0000000000..69d2a3c51b --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.s @@ -0,0 +1,1938 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr + .file "montmulf.c" + + .section ".data",#alloc,#write + .align 8 +TwoTo16: /* frequency 1.0 confidence 0.0 */ + .word 1089470464 + .word 0 + .type TwoTo16,#object + .size TwoTo16,8 +TwoToMinus16: /* frequency 1.0 confidence 0.0 */ + .word 1055916032 + .word 0 + .type TwoToMinus16,#object + .size TwoToMinus16,8 +Zero: /* frequency 1.0 confidence 0.0 */ + .word 0 + .word 0 + .type Zero,#object + .size Zero,8 +TwoTo32: /* frequency 1.0 confidence 0.0 */ + .word 1106247680 + .word 0 + .type TwoTo32,#object + .size TwoTo32,8 +TwoToMinus32: /* frequency 1.0 confidence 0.0 */ + .word 1039138816 + .word 0 + .type TwoToMinus32,#object + .size TwoToMinus32,8 + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE cleanup +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global cleanup + cleanup: /* frequency 1.0 confidence 0.0 */ +! FILE montmulf.c + +! 1 !#define RF_INLINE_MACROS +! 3 !static double TwoTo16=65536.0; +! 4 !static double TwoToMinus16=1.0/65536.0; +! 5 !static double Zero=0.0; +! 6 !static double TwoTo32=65536.0*65536.0; +! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0); +! 9 !#ifdef RF_INLINE_MACROS +! 11 !double upper32(double); +! 12 !double lower32(double, double); +! 13 !double mod(double, double, double); +! 15 !#else +! 17 !static double upper32(double x) +! 18 !{ +! 19 ! return floor(x*TwoToMinus32); +! 20 !} +! 22 !static double lower32(double x, double y) +! 23 !{ +! 24 ! return x-TwoTo32*floor(x*TwoToMinus32); +! 25 !} +! 27 !static double mod(double x, double oneoverm, double m) +! 28 !{ +! 29 ! return x-m*floor(x*oneoverm); +! 30 !} +! 32 !#endif +! 35 !void cleanup(double *dt, int from, int tlen) +! 36 !{ +! 37 ! int i; +! 38 ! double tmp,tmp1,x,x1; +! 40 ! tmp=tmp1=Zero; + +/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2 + +! 41 ! /* original code ** +! 42 ! for(i=2*from;i<2*tlen-2;i++) +! 43 ! { +! 44 ! x=dt[i]; +! 45 ! dt[i]=lower32(x,Zero)+tmp1; +! 46 ! tmp1=tmp; +! 47 ! tmp=upper32(x); +! 48 ! } +! 49 ! dt[tlen-2]+=tmp1; +! 50 ! dt[tlen-1]+=tmp; +! 51 ! **end original code ***/ +! 52 ! /* new code ***/ +! 53 ! for(i=2*from;i<2*tlen;i+=2) + +/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3 +/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0 +/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2 +/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4 +/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1 +/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4 +/* 0x001c 53 ( 3 4) */ cmp %g4,%g3 +/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56 +/* 0x0024 ( 4 5) */ fmovd %f0,%f2 +/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1 +/* 0x002c ( 4 5) */ sub %g3,1,%g3 + +! 54 ! { +! 55 ! x=dt[i]; + +/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8 + .L900000114: /* frequency 6.4 confidence 0.0 */ +/* 0x0034 ( 0 3) */ fdtox %f8,%f6 + +! 56 ! x1=dt[i+1]; + +/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10 + +! 57 ! dt[i]=lower32(x,Zero)+tmp; +! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1; +! 59 ! tmp=upper32(x); +! 60 ! tmp1=upper32(x1); + +/* 0x003c 60 ( 0 1) */ add %g4,2,%g4 +/* 0x0040 ( 1 4) */ fdtox %f8,%f8 +/* 0x0044 ( 1 2) */ cmp %g4,%g3 +/* 0x0048 ( 5 6) */ fmovs %f0,%f6 +/* 0x004c ( 7 10) */ fxtod %f6,%f6 +/* 0x0050 ( 8 11) */ fdtox %f10,%f0 +/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2 +/* 0x0058 (10 11) */ std %f2,[%g1] +/* 0x005c (12 15) */ ldd [%g2],%f2 +/* 0x0060 (14 15) */ fmovs %f2,%f0 +/* 0x0064 (16 19) */ fxtod %f0,%f6 +/* 0x0068 (17 20) */ fdtox %f10,%f0 +/* 0x006c (18 21) */ fitod %f8,%f2 +/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4 +/* 0x0074 (19 20) */ std %f4,[%g1+8] +/* 0x0078 60 (19 20) */ add %g1,16,%g1 +/* 0x007c (20 23) */ fitod %f0,%f4 +/* 0x0080 (20 23) */ ldd [%g2],%f0 +/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86 +/* 0x0088 (21 24) */ ldd [%g1],%f8 + .L77000116: /* frequency 1.0 confidence 0.0 */ +/* 0x008c ( 0 2) */ retl ! Result = +/* 0x0090 ( 1 2) */ nop +/* 0x0094 0 ( 0 0) */ .type cleanup,2 +/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_d16_to_i32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_d16_to_i32 + conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-136,%sp + +! 61 ! } +! 62 ! /** end new code **/ +! 63 !} +! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +! 67 !{ +! 68 !int i; +! 69 !long long t, t1, a, b, c, d; +! 71 ! t1=0; +! 72 ! a=(long long)d16[0]; + +/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0 + +! 73 ! b=(long long)d16[1]; +! 74 ! for(i=0; i>32); +! 79 ! d=(long long)d16[2*i+3]; +! 80 ! t1+=(b&0xffff)<<16; + +/* 0x0070 80 (15 16) */ and %g1,%o1,%o0 + +! 81 ! t+=(b>>16)+(t1>>32); +! 82 ! i32[i]=t1&0xffffffff; +! 83 ! t1=t; +! 84 ! a=c; +! 85 ! b=d; + +/* 0x0074 85 (15 16) */ add %g2,16,%g2 +/* 0x0078 80 (16 17) */ sllx %o0,16,%g3 +/* 0x007c 77 (16 17) */ and %g4,%o3,%o0 +/* 0x0080 76 (17 20) */ fdtox %f0,%f0 +/* 0x0084 (17 18) */ std %f0,[%sp+104] +/* 0x0088 74 (17 18) */ add %o0,%g3,%o4 +/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2 +/* 0x0090 81 (18 19) */ srax %g1,16,%o0 +/* 0x0094 82 (18 19) */ and %o4,%o3,%o7 +/* 0x0098 81 (19 20) */ stx %o0,[%sp+112] +/* 0x009c (19 20) */ srax %o4,32,%o0 +/* 0x00a0 85 (19 20) */ add %g5,4,%o5 +/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120] +/* 0x00a8 78 (20 21) */ srax %g4,32,%o4 +/* 0x00ac 79 (20 23) */ fdtox %f2,%f0 +/* 0x00b0 (21 22) */ std %f0,[%sp+96] +/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0 +/* 0x00b8 (23 25) */ ldx [%sp+120],%g4 +/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3 +/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4 +/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1 +/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4 +/* 0x00cc 82 (27 28) */ st %o7,[%g5] +/* 0x00d0 (27 28) */ or %g0,1,%o7 +/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4 + .L900000209: /* frequency 64.0 confidence 0.0 */ +/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0 +/* 0x00dc 85 (17 18) */ add %o7,1,%o7 +/* 0x00e0 (17 18) */ add %o5,4,%o5 +/* 0x00e4 (18 18) */ cmp %o7,%o2 +/* 0x00e8 (18 19) */ add %g2,16,%g2 +/* 0x00ec 76 (19 22) */ fdtox %f0,%f0 +/* 0x00f0 (20 21) */ std %f0,[%sp+104] +/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0 +/* 0x00f8 (23 26) */ fdtox %f0,%f0 +/* 0x00fc (24 25) */ std %f0,[%sp+96] +/* 0x0100 80 (25 26) */ and %g1,%o1,%g3 +/* 0x0104 (26 27) */ sllx %g3,16,%g3 +/* 0x0108 ( 0 0) */ stx %g3,[%sp+120] +/* 0x010c 77 (26 27) */ and %g4,%o3,%g3 +/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128] +/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7 +/* 0x0118 (27 27) */ add %g3,%o7,%g3 +/* 0x011c ( 0 0) */ ldx [%sp+128],%o7 +/* 0x0120 81 (28 29) */ srax %g1,16,%g1 +/* 0x0124 74 (28 28) */ add %g3,%o4,%g3 +/* 0x0128 81 (29 30) */ srax %g3,32,%o4 +/* 0x012c ( 0 0) */ stx %o4,[%sp+112] +/* 0x0130 78 (30 31) */ srax %g4,32,%o4 +/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4 +/* 0x0138 (30 31) */ add %g1,%g4,%g4 +/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1 +/* 0x0140 81 (31 32) */ add %o4,%g4,%o4 +/* 0x0144 82 (32 33) */ and %g3,%o3,%g3 +/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4 +/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50 +/* 0x0150 (33 34) */ st %g3,[%o5-4] + .L900000212: /* frequency 8.0 confidence 0.0 */ +/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00 +/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L77000134: /* frequency 0.7 confidence 0.0 */ + .L900000213: /* frequency 6.4 confidence 0.0 */ +/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0 +/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3 +/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0 +/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0 +/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104] +/* 0x0170 85 ( 1 2) */ add %o7,1,%o7 +/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4 +/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2 +/* 0x017c 85 ( 2 3) */ add %g2,16,%g2 +/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4 +/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128] +/* 0x0188 ( 4 5) */ srax %g1,16,%o0 +/* 0x018c ( 4 5) */ stx %o0,[%sp+112] +/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3 +/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0 +/* 0x0198 ( 5 6) */ stx %o0,[%sp+120] +/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0 +/* 0x01a0 ( 6 7) */ std %f0,[%sp+96] +/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4 +/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7 +/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4 +/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1 +/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4 +/* 0x01b8 (11 13) */ ldx [%sp+128],%o7 +/* 0x01bc (11 12) */ add %o4,%g4,%o4 +/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0 +/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4 +/* 0x01c8 82 (13 14) */ st %g3,[%o5] +/* 0x01cc 85 (13 14) */ add %o5,4,%o5 +/* 0x01d0 (13 14) */ cmp %o7,%o2 +/* 0x01d4 (14 15) */ or %g0,%o0,%g1 +/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86 +/* 0x01dc (14 17) */ ldd [%g2+16],%f0 + .L77000127: /* frequency 1.0 confidence 0.0 */ + +! 86 ! } +! 87 ! t1+=a&0xffffffff; +! 88 ! t=(a>>32); +! 89 ! t1+=(b&0xffff)<<16; +! 90 ! i32[i]=t1&0xffffffff; + +/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L900000214: /* frequency 1.0 confidence 0.0 */ +/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3 +/* 0x01e8 ( 0 1) */ add %g2,1023,%g2 +/* 0x01ec ( 1 2) */ srl %g3,0,%g3 +/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2 +/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4 +/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2 +/* 0x01fc ( 3 4) */ add %o4,%g4,%g4 +/* 0x0200 ( 4 5) */ add %g4,%g2,%g2 +/* 0x0204 ( 5 6) */ sll %o7,2,%g4 +/* 0x0208 ( 5 6) */ and %g2,%g3,%g2 +/* 0x020c ( 6 7) */ st %g2,[%g5+%g4] +/* 0x0210 ( 7 9) */ ret ! Result = +/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0 +/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2 +/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32 + conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ orcc %g0,%o2,%g1 + +! 92 !} +! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 95 !{ +! 96 !int i; +! 98 !#pragma pipeloop(0) +! 99 ! for(i=0;i>16); + +/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0 +/* 0x001c ( 3 4) */ add %o5,1,%g3 +/* 0x0020 ( 4 5) */ add %g2,1023,%o4 +/* 0x0024 109 ( 4 5) */ or %g0,0,%g1 +/* 0x0028 ( 5 6) */ cmp %g3,3 +/* 0x002c ( 5 6) */ or %g0,%i1,%o7 +/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3 +/* 0x0034 ( 6 7) */ or %g0,%i0,%g2 +/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44 +/* 0x003c ( 7 8) */ add %o7,4,%o0 +/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0 +/* 0x0044 113 ( 7 8) */ or %g0,1,%g1 +/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1 +/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7 +/* 0x0050 112 (10 11) */ and %o1,%o4,%o0 + .L900000406: /* frequency 64.0 confidence 0.0 */ +/* 0x0054 112 (22 23) */ st %o0,[%sp+96] +/* 0x0058 113 (22 23) */ add %g1,1,%g1 +/* 0x005c (22 23) */ add %g2,16,%g2 +/* 0x0060 (23 23) */ cmp %g1,%o5 +/* 0x0064 (23 24) */ add %o7,4,%o7 +/* 0x0068 112 (29 31) */ ld [%sp+96],%f3 +/* 0x006c ( 0 0) */ fmovs %f0,%f2 +/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2 +/* 0x0074 113 (32 33) */ srl %o1,16,%o0 +/* 0x0078 112 (32 33) */ std %f2,[%g2-16] +/* 0x007c 113 (33 34) */ st %o0,[%sp+92] +/* 0x0080 (40 42) */ ld [%sp+92],%f3 +/* 0x0084 111 (41 43) */ ld [%o7-4],%o1 +/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2 +/* 0x008c (42 45) */ fsubd %f2,%f0,%f2 +/* 0x0090 112 (43 44) */ and %o1,%o4,%o0 +/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50 +/* 0x0098 (43 44) */ std %f2,[%g2-8] + .L900000409: /* frequency 8.0 confidence 0.0 */ +/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96] +/* 0x00a0 ( 0 1) */ fmovs %f0,%f2 +/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2 +/* 0x00a8 ( 1 2) */ srl %o1,16,%o0 +/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x00b4 ( 6 7) */ std %f2,[%g2-16] +/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x00bc (10 11) */ fmovs %f0,%f2 +/* 0x00c0 (11 14) */ ld [%sp+92],%f3 +/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x00c8 (13 14) */ std %f0,[%g2-8] +/* 0x00cc (14 16) */ ret ! Result = +/* 0x00d0 (16 17) */ restore %g0,%g0,%g0 + .L77000154: /* frequency 0.7 confidence 0.0 */ +/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0 + .L900000410: /* frequency 6.4 confidence 0.0 */ +/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1 +/* 0x00dc ( 0 1) */ st %o1,[%sp+96] +/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1 +/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0 +/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0 +/* 0x00ec ( 1 2) */ add %o7,4,%o7 +/* 0x00f0 ( 2 3) */ cmp %g1,%o5 +/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2 +/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x0100 ( 6 7) */ std %f2,[%g2] +/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x0108 (10 11) */ fmovs %f0,%f2 +/* 0x010c (11 14) */ ld [%sp+92],%f3 +/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x0114 (13 14) */ std %f0,[%g2+8] +/* 0x0118 (13 14) */ add %g2,16,%g2 +/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86 +/* 0x0120 (14 17) */ ld [%o7],%o0 + .L77000150: /* frequency 1.0 confidence 0.0 */ +/* 0x0124 ( 0 2) */ ret ! Result = +/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2 +/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-104,%sp +/* 0x0004 ( 1 2) */ or %g0,%i3,%i4 +/* 0x0008 ( 1 2) */ or %g0,%i2,%g1 + +! 114 ! } +! 115 !} +! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! 119 ! double * /* 0 */, +! 120 ! double * /*result16*/, double * /* result32 */, +! 121 ! float * /*source - should be unsigned int* +! 122 ! converted to float* */); +! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 127 ! unsigned int *i32, int len) +! 128 !{ +! 129 !int i; +! 130 !unsigned int a; +! 132 !#pragma pipeloop(0) +! 133 ! for(i=0;i>16); + +/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1 +/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0 +/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0 +/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3 +/* 0x0138 ( 2 3) */ sll %o7,2,%g2 +/* 0x013c ( 2 3) */ add %o0,1023,%o3 +/* 0x0140 ( 3 4) */ sll %o7,3,%g4 +/* 0x0144 ( 3 4) */ cmp %g3,3 +/* 0x0148 ( 4 5) */ add %g1,%g2,%o0 +/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2 +/* 0x0150 ( 5 6) */ add %i3,%g4,%o4 +/* 0x0154 ( 5 6) */ sub %i4,1,%o1 +/* 0x0158 ( 6 7) */ sll %o7,4,%g5 +/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44 +/* 0x0160 ( 7 8) */ add %i1,%g5,%o5 +/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3 +/* 0x0168 143 ( 7 8) */ add %o4,8,%o4 +/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1 +/* 0x0170 143 ( 8 9) */ add %o5,16,%o5 +/* 0x0174 ( 8 9) */ add %o7,1,%o7 +/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2 +/* 0x017c 143 ( 9 10) */ add %o0,4,%o0 +/* 0x0180 142 (10 11) */ and %g1,%o3,%g2 +/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x0188 (11 12) */ std %f2,[%o4-8] +/* 0x018c 143 (11 12) */ srl %g1,16,%g1 +/* 0x0190 142 (12 13) */ st %g2,[%sp+96] +/* 0x0194 (15 16) */ fmovs %f0,%f2 +/* 0x0198 (16 19) */ ld [%sp+96],%f3 +/* 0x019c (18 21) */ fsubd %f2,%f0,%f2 +/* 0x01a0 (18 19) */ std %f2,[%o5-16] +/* 0x01a4 143 (19 20) */ st %g1,[%sp+92] +/* 0x01a8 (22 23) */ fmovs %f0,%f2 +/* 0x01ac (23 26) */ ld [%sp+92],%f3 +/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2 +/* 0x01b4 (25 26) */ std %f2,[%o5-8] + .L900000509: /* frequency 64.0 confidence 0.0 */ +/* 0x01b8 141 (26 28) */ ld [%o0],%f3 +/* 0x01bc 143 (26 27) */ add %o7,2,%o7 +/* 0x01c0 (26 27) */ add %o5,32,%o5 +/* 0x01c4 140 (27 29) */ ld [%o0],%g1 +/* 0x01c8 143 (27 27) */ cmp %o7,%o1 +/* 0x01cc (27 28) */ add %o4,16,%o4 +/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2 +/* 0x01d8 (29 30) */ std %f2,[%o4-16] +/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2 +/* 0x01e0 (30 31) */ st %g2,[%sp+96] +/* 0x01e4 (37 39) */ ld [%sp+96],%f3 +/* 0x01e8 ( 0 0) */ fmovs %f0,%f2 +/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2 +/* 0x01f0 143 (40 41) */ srl %g1,16,%g1 +/* 0x01f4 142 (40 41) */ std %f2,[%o5-32] +/* 0x01f8 143 (41 42) */ st %g1,[%sp+92] +/* 0x01fc (48 50) */ ld [%sp+92],%f3 +/* 0x0200 ( 0 0) */ fmovs %f0,%f2 +/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2 +/* 0x0208 (51 52) */ std %f2,[%o5-24] +/* 0x020c (51 52) */ add %o0,4,%o0 +/* 0x0210 141 (52 54) */ ld [%o0],%f3 +/* 0x0214 140 (53 55) */ ld [%o0],%g1 +/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x021c (54 57) */ fsubd %f2,%f0,%f2 +/* 0x0220 (55 56) */ std %f2,[%o4-8] +/* 0x0224 142 (55 56) */ and %g1,%o3,%g2 +/* 0x0228 (56 57) */ st %g2,[%sp+96] +/* 0x022c (63 65) */ ld [%sp+96],%f3 +/* 0x0230 ( 0 0) */ fmovs %f0,%f2 +/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2 +/* 0x0238 143 (66 67) */ srl %g1,16,%g1 +/* 0x023c 142 (66 67) */ std %f2,[%o5-16] +/* 0x0240 143 (67 68) */ st %g1,[%sp+92] +/* 0x0244 (74 76) */ ld [%sp+92],%f3 +/* 0x0248 ( 0 0) */ fmovs %f0,%f2 +/* 0x024c (76 79) */ fsubd %f2,%f0,%f2 +/* 0x0250 (77 78) */ std %f2,[%o5-8] +/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50 +/* 0x0258 (77 78) */ add %o0,4,%o0 + .L900000512: /* frequency 8.0 confidence 0.0 */ +/* 0x025c 143 ( 0 1) */ cmp %o7,%i4 +/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14 +/* 0x0264 ( 0 1) */ nop + .L77000161: /* frequency 0.7 confidence 0.0 */ +/* 0x0268 141 ( 0 3) */ ld [%o0],%f3 + .L900000513: /* frequency 6.4 confidence 0.0 */ +/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0 +/* 0x0270 143 ( 0 1) */ add %o7,1,%o7 +/* 0x0274 140 ( 1 4) */ ld [%o0],%o1 +/* 0x0278 143 ( 1 2) */ add %o0,4,%o0 +/* 0x027c ( 1 2) */ cmp %o7,%i4 +/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2 +/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1 +/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2 +/* 0x028c ( 4 5) */ std %f2,[%o4] +/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1 +/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96] +/* 0x0298 143 ( 5 6) */ add %o4,8,%o4 +/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2 +/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3 +/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x02a8 (11 12) */ std %f2,[%o5] +/* 0x02ac 143 (12 13) */ st %o1,[%sp+92] +/* 0x02b0 (15 16) */ fmovs %f0,%f2 +/* 0x02b4 (16 19) */ ld [%sp+92],%f3 +/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0 +/* 0x02bc (18 19) */ std %f0,[%o5+8] +/* 0x02c0 (18 19) */ add %o5,16,%o5 +/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86 +/* 0x02c8 (19 22) */ ld [%o0],%f3 + .L77000164: /* frequency 1.0 confidence 0.0 */ +/* 0x02cc ( 0 2) */ ret ! Result = +/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2 +/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global adjust_montf_result + adjust_montf_result: /* frequency 1.0 confidence 0.0 */ + +! 144 ! } +! 145 !} +! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 149 !{ +! 150 !long long acc; +! 151 !int i; +! 153 ! if(i32[len]>0) i=-1; + +/* 000000 153 ( 0 1) */ sll %o2,2,%g1 +/* 0x0004 ( 0 1) */ or %g0,-1,%g3 +/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1 +/* 0x000c ( 3 4) */ cmp %g1,0 +/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50 +/* 0x0014 ( 3 4) */ or %g0,%o1,%o3 +/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00 +/* 0x001c ( 4 5) */ cmp %g3,0 + .L77000175: /* frequency 0.8 confidence 0.0 */ + +! 154 ! else +! 155 ! { +! 156 ! for(i=len-1; i>=0; i++) + +/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3 +/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60 +/* 0x0028 ( 1 2) */ cmp %g3,0 +/* 0x002c ( 1 2) */ sll %g3,2,%g1 +/* 0x0030 ( 2 3) */ add %o0,%g1,%g2 +/* 0x0034 ( 2 3) */ add %o1,%g1,%g1 + +! 157 ! { +! 158 ! if(i32[i]!=nint[i]) break; + +/* 0x0038 158 ( 3 6) */ ld [%g1],%g5 + .L900000610: /* frequency 5.3 confidence 0.0 */ +/* 0x003c 158 ( 0 3) */ ld [%g2],%o5 +/* 0x0040 ( 0 1) */ add %g1,4,%g1 +/* 0x0044 ( 0 1) */ add %g2,4,%g2 +/* 0x0048 ( 2 3) */ cmp %o5,%g5 +/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16 +/* 0x0050 ( 2 3) */ nop +/* 0x0054 ( 3 4) */ addcc %g3,1,%g3 +/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84 +/* 0x005c ( 3 6) */ ld [%g1],%g5 + .L77000182: /* frequency 1.0 confidence 0.0 */ + +! 159 ! } +! 160 ! } +! 161 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0060 161 ( 0 1) */ cmp %g3,0 + .L900000611: /* frequency 1.0 confidence 0.0 */ +/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50 +/* 0x0068 ( 0 1) */ sll %g3,2,%g2 +/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1 +/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2 +/* 0x0074 ( 4 5) */ cmp %g2,%g1 +/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56 +/* 0x007c ( 4 5) */ nop + .L77000198: /* frequency 0.8 confidence 0.0 */ + +! 162 ! { +! 163 ! acc=0; +! 164 ! for(i=0;i>32; + +/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5 +/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1 +/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2 +/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5 +/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2 +/* 0x00d8 ( 9 10) */ st %o2,[%o0] +/* 0x00dc 168 (10 11) */ srax %g5,32,%g5 + .L900000605: /* frequency 64.0 confidence 0.0 */ +/* 0x00e0 166 (12 20) */ ld [%o3],%o2 +/* 0x00e4 168 (12 13) */ add %o5,1,%o5 +/* 0x00e8 (12 13) */ add %o3,4,%o3 +/* 0x00ec (13 13) */ cmp %o5,%g4 +/* 0x00f0 (13 14) */ add %o4,4,%o4 +/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1 +/* 0x00f8 (15 15) */ add %g1,%g5,%g5 +/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2 +/* 0x0100 166 (16 24) */ ld [%o4-4],%g1 +/* 0x0104 167 (17 18) */ st %o2,[%o4-8] +/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50 +/* 0x010c (17 18) */ srax %g5,32,%g5 + .L900000608: /* frequency 8.0 confidence 0.0 */ +/* 0x0110 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0118 ( 3 4) */ add %g1,%g5,%g1 +/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2 +/* 0x0120 ( 5 7) */ retl ! Result = +/* 0x0124 ( 6 7) */ st %g2,[%o4-4] + .L77000199: /* frequency 0.6 confidence 0.0 */ +/* 0x0128 166 ( 0 3) */ ld [%o4],%g1 + .L900000609: /* frequency 5.3 confidence 0.0 */ +/* 0x012c 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0130 ( 0 1) */ add %g5,%g1,%g1 +/* 0x0134 168 ( 0 1) */ add %o5,1,%o5 +/* 0x0138 ( 1 2) */ add %o3,4,%o3 +/* 0x013c ( 1 2) */ cmp %o5,%g4 +/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2 +/* 0x0148 ( 3 4) */ st %g2,[%o4] +/* 0x014c 168 ( 3 4) */ add %o4,4,%o4 +/* 0x0150 ( 4 5) */ srax %g1,32,%g5 +/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84 +/* 0x0158 ( 4 7) */ ld [%o4],%g1 + .L77000191: /* frequency 1.0 confidence 0.0 */ +/* 0x015c ( 0 2) */ retl ! Result = +/* 0x0160 ( 1 2) */ nop +/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2 +/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 32 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global mont_mulf_noconv + mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-144,%sp +/* 0x0004 ( 1 2) */ st %i0,[%fp+68] + +! 169 ! } +! 170 ! } +! 171 !} +! 175 !void cleanup(double *dt, int from, int tlen); +! 177 !/* +! 178 !** the lengths of the input arrays should be at least the following: +! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 180 !** all of them should be different from one another +! 181 !** +! 182 !*/ +! 183 !void mont_mulf_noconv(unsigned int *result, +! 184 ! double *dm1, double *dm2, double *dt, +! 185 ! double *dn, unsigned int *nint, +! 186 ! int nlen, double dn0) +! 187 !{ +! 188 ! int i, j, jj; +! 189 ! int tmp; +! 190 ! double digit, m2j, nextm2j, a, b; +! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 193 ! pdm1=&(dm1[0]); +! 194 ! pdm2=&(dm2[0]); +! 195 ! pdn=&(dn[0]); +! 196 ! pdm2[2*nlen]=Zero; + +/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2 +/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1 +/* 0x0010 ( 2 3) */ st %i5,[%fp+88] +/* 0x0014 ( 2 3) */ or %g0,%i3,%o2 +/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4 +/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2 +/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5 +/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0 +/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2 + +! 198 ! if (nlen!=16) +! 199 ! { +! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 202 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); +! 205 ! pdtj=&(dt[0]); +! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 207 ! { +! 208 ! m2j=pdm2[j]; +! 209 ! a=pdtj[0]+pdn[0]*digit; +! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 211 ! pdtj[1]=b; +! 213 !#pragma pipeloop(0) +! 214 ! for(i=1;i>32); +! 122 ! d=(long long)d16[2*i+3]; +! 123 ! t1+=(b&0xffff)<<16; +! 124 ! t+=(b>>16)+(t1>>32); +! 125 ! i32[i]=t1&0xffffffff; +! 126 ! t1=t; +! 127 ! a=c; +! 128 ! b=d; + +/* 0x0070 128 */ add %o0,16,%g2 +/* 0x0074 123 */ and %g1,%o1,%o0 +/* 0x0078 */ sllx %o0,16,%g3 +/* 0x007c 120 */ and %g4,%o3,%o0 +/* 0x0080 117 */ add %o0,%g3,%o4 +/* 0x0084 119 */ fdtox %f0,%f0 +/* 0x0088 */ std %f0,[%sp+104] +/* 0x008c 125 */ and %o4,%o3,%g5 +/* 0x0090 122 */ ldd [%g2+8],%f2 +/* 0x0094 128 */ add %o5,4,%o5 +/* 0x0098 124 */ srax %o4,32,%o4 +/* 0x009c */ stx %o4,[%sp+112] +/* 0x00a0 122 */ fdtox %f2,%f0 +/* 0x00a4 */ std %f0,[%sp+96] +/* 0x00a8 124 */ srax %g1,16,%o0 +/* 0x00ac */ ldx [%sp+112],%o7 +/* 0x00b0 121 */ srax %g4,32,%o4 +/* 0x00b4 124 */ add %o0,%o7,%g4 +/* 0x00b8 128 */ or %g0,1,%o7 +/* 0x00bc 119 */ ldx [%sp+104],%g3 +/* 0x00c0 124 */ add %o4,%g4,%o4 +/* 0x00c4 122 */ ldx [%sp+96],%g1 +/* 0x00c8 125 */ st %g5,[%o5-4] +/* 0x00cc 127 */ or %g0,%g3,%g4 + .L900000112: +/* 0x00d0 119 */ ldd [%g2+16],%f0 +/* 0x00d4 128 */ add %o7,1,%o7 +/* 0x00d8 */ add %o5,4,%o5 +/* 0x00dc */ cmp %o7,%o2 +/* 0x00e0 */ add %g2,16,%g2 +/* 0x00e4 119 */ fdtox %f0,%f0 +/* 0x00e8 */ std %f0,[%sp+104] +/* 0x00ec 122 */ ldd [%g2+8],%f0 +/* 0x00f0 */ fdtox %f0,%f0 +/* 0x00f4 */ std %f0,[%sp+96] +/* 0x00f8 123 */ and %g1,%o1,%g3 +/* 0x00fc */ sllx %g3,16,%g5 +/* 0x0100 120 */ and %g4,%o3,%g3 +/* 0x0104 117 */ add %g3,%g5,%g3 +/* 0x0108 124 */ srax %g1,16,%g1 +/* 0x010c 117 */ add %g3,%o4,%g3 +/* 0x0110 124 */ srax %g3,32,%o4 +/* 0x0114 */ stx %o4,[%sp+112] +/* 0x0118 119 */ ldx [%sp+104],%g5 +/* 0x011c 121 */ srax %g4,32,%o4 +/* 0x0120 124 */ ldx [%sp+112],%g4 +/* 0x0124 */ add %g1,%g4,%g4 +/* 0x0128 122 */ ldx [%sp+96],%g1 +/* 0x012c 124 */ add %o4,%g4,%o4 +/* 0x0130 125 */ and %g3,%o3,%g3 +/* 0x0134 127 */ or %g0,%g5,%g4 +/* 0x0138 128 */ ble,pt %icc,.L900000112 +/* 0x013c */ st %g3,[%o5-4] + .L900000115: +/* 0x0140 128 */ ba .L900000117 +/* 0x0144 */ sethi %hi(0xfc00),%g2 + .L77000134: +/* 0x0148 119 */ ldd [%g2+16],%f0 + .L900000116: +/* 0x014c 120 */ and %g4,%o3,%o0 +/* 0x0150 123 */ and %g1,%o1,%g3 +/* 0x0154 119 */ fdtox %f0,%f0 +/* 0x0158 120 */ add %o4,%o0,%o0 +/* 0x015c 119 */ std %f0,[%sp+104] +/* 0x0160 128 */ add %o7,1,%o7 +/* 0x0164 123 */ sllx %g3,16,%o4 +/* 0x0168 122 */ ldd [%g2+24],%f2 +/* 0x016c 128 */ add %g2,16,%g2 +/* 0x0170 123 */ add %o0,%o4,%o0 +/* 0x0174 128 */ cmp %o7,%o2 +/* 0x0178 125 */ and %o0,%o3,%g3 +/* 0x017c 122 */ fdtox %f2,%f0 +/* 0x0180 */ std %f0,[%sp+96] +/* 0x0184 124 */ srax %o0,32,%o0 +/* 0x0188 */ stx %o0,[%sp+112] +/* 0x018c 121 */ srax %g4,32,%o4 +/* 0x0190 122 */ ldx [%sp+96],%o0 +/* 0x0194 124 */ srax %g1,16,%g5 +/* 0x0198 */ ldx [%sp+112],%g4 +/* 0x019c 119 */ ldx [%sp+104],%g1 +/* 0x01a0 125 */ st %g3,[%o5] +/* 0x01a4 124 */ add %g5,%g4,%g4 +/* 0x01a8 128 */ add %o5,4,%o5 +/* 0x01ac 124 */ add %o4,%g4,%o4 +/* 0x01b0 127 */ or %g0,%g1,%g4 +/* 0x01b4 128 */ or %g0,%o0,%g1 +/* 0x01b8 */ ble,a,pt %icc,.L900000116 +/* 0x01bc */ ldd [%g2+16],%f0 + .L77000127: + +! 129 ! } +! 130 ! t1+=a&0xffffffff; +! 131 ! t=(a>>32); +! 132 ! t1+=(b&0xffff)<<16; +! 133 ! i32[i]=t1&0xffffffff; + +/* 0x01c0 133 */ sethi %hi(0xfc00),%g2 + .L900000117: +/* 0x01c4 133 */ or %g0,-1,%g3 +/* 0x01c8 */ add %g2,1023,%g2 +/* 0x01cc */ srl %g3,0,%g3 +/* 0x01d0 */ and %g1,%g2,%g2 +/* 0x01d4 */ and %g4,%g3,%g4 +/* 0x01d8 */ sllx %g2,16,%g2 +/* 0x01dc */ add %o4,%g4,%g4 +/* 0x01e0 */ add %g4,%g2,%g2 +/* 0x01e4 */ sll %o7,2,%g4 +/* 0x01e8 */ and %g2,%g3,%g2 +/* 0x01ec */ st %g2,[%i0+%g4] +/* 0x01f0 */ ret ! Result = +/* 0x01f4 */ restore %g0,%g0,%g0 +/* 0x01f8 0 */ .type conv_d16_to_i32,2 +/* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000201: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 4 +/* 0x0008 */ .skip 16 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32 + conv_i32_to_d32: +/* 000000 */ or %g0,%o7,%g2 + +! 135 !} +! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 138 !{ +! 139 !int i; +! 141 !#pragma pipeloop(0) +! 142 ! for(i=0;i>16); + +/* 0x0008 156 */ sethi %hi(.L_const_seg_900000301),%g2 + .L900000310: +/* 0x000c */ call .+8 +/* 0x0010 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3 +/* 0x0014 152 */ cmp %o0,0 +/* 0x0018 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3 +/* 0x001c 152 */ ble,pt %icc,.L77000150 +/* 0x0020 */ add %g3,%o7,%o2 +/* 0x0024 */ sub %i2,1,%o5 +/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%o1 +/* 0x002c 152 */ sethi %hi(0xfc00),%o0 +/* 0x0030 */ ld [%o2+%o1],%o3 +/* 0x0034 */ add %o5,1,%g2 +/* 0x0038 */ or %g0,0,%g1 +/* 0x003c */ cmp %g2,3 +/* 0x0040 */ or %g0,%i1,%o7 +/* 0x0044 */ add %o0,1023,%o4 +/* 0x0048 */ or %g0,%i0,%g3 +/* 0x004c */ bl,pn %icc,.L77000154 +/* 0x0050 */ add %o7,4,%o0 +/* 0x0054 155 */ ldd [%o3],%f0 +/* 0x0058 156 */ or %g0,1,%g1 +/* 0x005c 154 */ ld [%o0-4],%o1 +/* 0x0060 0 */ or %g0,%o0,%o7 +/* 0x0064 155 */ and %o1,%o4,%o0 + .L900000306: +/* 0x0068 155 */ st %o0,[%sp+96] +/* 0x006c 156 */ add %g1,1,%g1 +/* 0x0070 */ add %g3,16,%g3 +/* 0x0074 */ cmp %g1,%o5 +/* 0x0078 */ add %o7,4,%o7 +/* 0x007c 155 */ ld [%sp+96],%f3 +/* 0x0080 */ fmovs %f0,%f2 +/* 0x0084 */ fsubd %f2,%f0,%f2 +/* 0x0088 156 */ srl %o1,16,%o0 +/* 0x008c 155 */ std %f2,[%g3-16] +/* 0x0090 156 */ st %o0,[%sp+92] +/* 0x0094 */ ld [%sp+92],%f3 +/* 0x0098 154 */ ld [%o7-4],%o1 +/* 0x009c 156 */ fmovs %f0,%f2 +/* 0x00a0 */ fsubd %f2,%f0,%f2 +/* 0x00a4 155 */ and %o1,%o4,%o0 +/* 0x00a8 156 */ ble,pt %icc,.L900000306 +/* 0x00ac */ std %f2,[%g3-8] + .L900000309: +/* 0x00b0 155 */ st %o0,[%sp+96] +/* 0x00b4 */ fmovs %f0,%f2 +/* 0x00b8 156 */ add %g3,16,%g3 +/* 0x00bc */ srl %o1,16,%o0 +/* 0x00c0 155 */ ld [%sp+96],%f3 +/* 0x00c4 */ fsubd %f2,%f0,%f2 +/* 0x00c8 */ std %f2,[%g3-16] +/* 0x00cc 156 */ st %o0,[%sp+92] +/* 0x00d0 */ fmovs %f0,%f2 +/* 0x00d4 */ ld [%sp+92],%f3 +/* 0x00d8 */ fsubd %f2,%f0,%f0 +/* 0x00dc */ std %f0,[%g3-8] +/* 0x00e0 */ ret ! Result = +/* 0x00e4 */ restore %g0,%g0,%g0 + .L77000154: +/* 0x00e8 154 */ ld [%o7],%o0 + .L900000311: +/* 0x00ec 155 */ and %o0,%o4,%o1 +/* 0x00f0 */ st %o1,[%sp+96] +/* 0x00f4 156 */ add %g1,1,%g1 +/* 0x00f8 155 */ ldd [%o3],%f0 +/* 0x00fc 156 */ srl %o0,16,%o0 +/* 0x0100 */ add %o7,4,%o7 +/* 0x0104 */ cmp %g1,%o5 +/* 0x0108 155 */ fmovs %f0,%f2 +/* 0x010c */ ld [%sp+96],%f3 +/* 0x0110 */ fsubd %f2,%f0,%f2 +/* 0x0114 */ std %f2,[%g3] +/* 0x0118 156 */ st %o0,[%sp+92] +/* 0x011c */ fmovs %f0,%f2 +/* 0x0120 */ ld [%sp+92],%f3 +/* 0x0124 */ fsubd %f2,%f0,%f0 +/* 0x0128 */ std %f0,[%g3+8] +/* 0x012c */ add %g3,16,%g3 +/* 0x0130 */ ble,a,pt %icc,.L900000311 +/* 0x0134 */ ld [%o7],%o0 + .L77000150: +/* 0x0138 */ ret ! Result = +/* 0x013c */ restore %g0,%g0,%g0 +/* 0x0140 0 */ .type conv_i32_to_d16,2 +/* 0x0140 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000401: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 4 +/* 0x0008 */ .skip 16 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: +/* 000000 */ save %sp,-120,%sp + .L900000415: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4 + +! 157 ! } +! 158 !} +! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 162 ! unsigned int *i32, int len) +! 163 !{ +! 164 !int i = 0; +! 165 !unsigned int a; +! 167 !#pragma pipeloop(0) +! 168 !#ifdef RF_INLINE_MACROS +! 169 ! for(;i>16); + +/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2 +/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%o1 +/* 0x0160 175 */ sethi %hi(0xfc00),%o0 +/* 0x0164 */ ld [%o4+%o1],%o2 +/* 0x0168 */ sll %g5,2,%o3 +/* 0x016c */ sub %i3,%g5,%g3 +/* 0x0170 */ sll %g5,3,%g2 +/* 0x0174 */ add %o0,1023,%o4 +/* 0x0178 178 */ ldd [%o2],%f0 +/* 0x017c */ add %i2,%o3,%o0 +/* 0x0180 175 */ cmp %g3,3 +/* 0x0184 */ add %i4,%g2,%o3 +/* 0x0188 */ sub %i3,1,%o1 +/* 0x018c */ sll %g5,4,%g4 +/* 0x0190 */ bl,pn %icc,.L77000161 +/* 0x0194 */ add %i1,%g4,%o5 +/* 0x0198 178 */ ld [%o0],%f3 +/* 0x019c 180 */ add %o3,8,%o3 +/* 0x01a0 177 */ ld [%o0],%o7 +/* 0x01a4 180 */ add %o5,16,%o5 +/* 0x01a8 */ add %g5,1,%g5 +/* 0x01ac 178 */ fmovs %f0,%f2 +/* 0x01b0 180 */ add %o0,4,%o0 +/* 0x01b4 179 */ and %o7,%o4,%g1 +/* 0x01b8 178 */ fsubd %f2,%f0,%f2 +/* 0x01bc */ std %f2,[%o3-8] +/* 0x01c0 180 */ srl %o7,16,%o7 +/* 0x01c4 179 */ st %g1,[%sp+96] +/* 0x01c8 */ fmovs %f0,%f2 +/* 0x01cc */ ld [%sp+96],%f3 +/* 0x01d0 */ fsubd %f2,%f0,%f2 +/* 0x01d4 */ std %f2,[%o5-16] +/* 0x01d8 180 */ st %o7,[%sp+92] +/* 0x01dc */ fmovs %f0,%f2 +/* 0x01e0 */ ld [%sp+92],%f3 +/* 0x01e4 */ fsubd %f2,%f0,%f2 +/* 0x01e8 */ std %f2,[%o5-8] + .L900000411: +/* 0x01ec 178 */ ld [%o0],%f3 +/* 0x01f0 180 */ add %g5,2,%g5 +/* 0x01f4 */ add %o5,32,%o5 +/* 0x01f8 177 */ ld [%o0],%o7 +/* 0x01fc 180 */ cmp %g5,%o1 +/* 0x0200 */ add %o3,16,%o3 +/* 0x0204 178 */ fmovs %f0,%f2 +/* 0x0208 */ fsubd %f2,%f0,%f2 +/* 0x020c */ std %f2,[%o3-16] +/* 0x0210 179 */ and %o7,%o4,%g1 +/* 0x0214 */ st %g1,[%sp+96] +/* 0x0218 */ ld [%sp+96],%f3 +/* 0x021c */ fmovs %f0,%f2 +/* 0x0220 */ fsubd %f2,%f0,%f2 +/* 0x0224 180 */ srl %o7,16,%o7 +/* 0x0228 179 */ std %f2,[%o5-32] +/* 0x022c 180 */ st %o7,[%sp+92] +/* 0x0230 */ ld [%sp+92],%f3 +/* 0x0234 */ fmovs %f0,%f2 +/* 0x0238 */ fsubd %f2,%f0,%f2 +/* 0x023c */ std %f2,[%o5-24] +/* 0x0240 */ add %o0,4,%o0 +/* 0x0244 178 */ ld [%o0],%f3 +/* 0x0248 177 */ ld [%o0],%o7 +/* 0x024c 178 */ fmovs %f0,%f2 +/* 0x0250 */ fsubd %f2,%f0,%f2 +/* 0x0254 */ std %f2,[%o3-8] +/* 0x0258 179 */ and %o7,%o4,%g1 +/* 0x025c */ st %g1,[%sp+96] +/* 0x0260 */ ld [%sp+96],%f3 +/* 0x0264 */ fmovs %f0,%f2 +/* 0x0268 */ fsubd %f2,%f0,%f2 +/* 0x026c 180 */ srl %o7,16,%o7 +/* 0x0270 179 */ std %f2,[%o5-16] +/* 0x0274 180 */ st %o7,[%sp+92] +/* 0x0278 */ ld [%sp+92],%f3 +/* 0x027c */ fmovs %f0,%f2 +/* 0x0280 */ fsubd %f2,%f0,%f2 +/* 0x0284 */ std %f2,[%o5-8] +/* 0x0288 */ bl,pt %icc,.L900000411 +/* 0x028c */ add %o0,4,%o0 + .L900000414: +/* 0x0290 180 */ cmp %g5,%i3 +/* 0x0294 */ bge,pn %icc,.L77000164 +/* 0x0298 */ nop + .L77000161: +/* 0x029c 178 */ ld [%o0],%f3 + .L900000416: +/* 0x02a0 178 */ ldd [%o2],%f0 +/* 0x02a4 180 */ add %g5,1,%g5 +/* 0x02a8 177 */ ld [%o0],%o1 +/* 0x02ac 180 */ add %o0,4,%o0 +/* 0x02b0 */ cmp %g5,%i3 +/* 0x02b4 178 */ fmovs %f0,%f2 +/* 0x02b8 179 */ and %o1,%o4,%o7 +/* 0x02bc 178 */ fsubd %f2,%f0,%f2 +/* 0x02c0 */ std %f2,[%o3] +/* 0x02c4 180 */ srl %o1,16,%o1 +/* 0x02c8 179 */ st %o7,[%sp+96] +/* 0x02cc 180 */ add %o3,8,%o3 +/* 0x02d0 179 */ fmovs %f0,%f2 +/* 0x02d4 */ ld [%sp+96],%f3 +/* 0x02d8 */ fsubd %f2,%f0,%f2 +/* 0x02dc */ std %f2,[%o5] +/* 0x02e0 180 */ st %o1,[%sp+92] +/* 0x02e4 */ fmovs %f0,%f2 +/* 0x02e8 */ ld [%sp+92],%f3 +/* 0x02ec */ fsubd %f2,%f0,%f0 +/* 0x02f0 */ std %f0,[%o5+8] +/* 0x02f4 */ add %o5,16,%o5 +/* 0x02f8 */ bl,a,pt %icc,.L900000416 +/* 0x02fc */ ld [%o0],%f3 + .L77000164: +/* 0x0300 */ ret ! Result = +/* 0x0304 */ restore %g0,%g0,%g0 +/* 0x0308 0 */ .type conv_i32_to_d32_and_d16,2 +/* 0x0308 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global adjust_montf_result + adjust_montf_result: +/* 000000 */ or %g0,%o2,%g5 + +! 181 ! } +! 182 !} +! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 186 !{ +! 187 !long long acc; +! 188 !int i; +! 190 ! if(i32[len]>0) i=-1; + +/* 0x0004 190 */ or %g0,-1,%g4 +/* 0x0008 */ sll %o2,2,%g1 +/* 0x000c */ ld [%o0+%g1],%g1 +/* 0x0010 */ cmp %g1,0 +/* 0x0014 */ bleu,pn %icc,.L77000175 +/* 0x0018 */ or %g0,%o1,%o3 +/* 0x001c */ ba .L900000511 +/* 0x0020 */ cmp %g4,0 + .L77000175: + +! 191 ! else +! 192 ! { +! 193 ! for(i=len-1; i>=0; i--) + +/* 0x0024 193 */ sub %o2,1,%g4 +/* 0x0028 */ sll %g4,2,%g1 +/* 0x002c */ cmp %g4,0 +/* 0x0030 */ bl,pt %icc,.L900000511 +/* 0x0034 */ cmp %g4,0 +/* 0x0038 */ add %o1,%g1,%g2 + +! 194 ! { +! 195 ! if(i32[i]!=nint[i]) break; + +/* 0x003c 195 */ ld [%g2],%o5 +/* 0x0040 193 */ add %o0,%g1,%g3 + .L900000510: +/* 0x0044 195 */ ld [%g3],%o2 +/* 0x0048 */ sub %g4,1,%g1 +/* 0x004c */ sub %g2,4,%g2 +/* 0x0050 */ sub %g3,4,%g3 +/* 0x0054 */ cmp %o2,%o5 +/* 0x0058 */ bne,pn %icc,.L77000182 +/* 0x005c */ nop +/* 0x0060 0 */ or %g0,%g1,%g4 +/* 0x0064 195 */ cmp %g1,0 +/* 0x0068 */ bge,a,pt %icc,.L900000510 +/* 0x006c */ ld [%g2],%o5 + .L77000182: + +! 196 ! } +! 197 ! } +! 198 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0070 198 */ cmp %g4,0 + .L900000511: +/* 0x0074 198 */ bl,pn %icc,.L77000198 +/* 0x0078 */ sll %g4,2,%g2 +/* 0x007c */ ld [%o1+%g2],%g1 +/* 0x0080 */ ld [%o0+%g2],%g2 +/* 0x0084 */ cmp %g2,%g1 +/* 0x0088 */ bleu,pt %icc,.L77000191 +/* 0x008c */ nop + .L77000198: + +! 199 ! { +! 200 ! acc=0; +! 201 ! for(i=0;i>32; + +/* 0x00dc 205 */ or %g0,2,%o5 +/* 0x00e0 201 */ sub %o2,%o1,%o2 +/* 0x00e4 */ or %g0,%o2,%g5 +/* 0x00e8 204 */ and %o2,%g3,%o2 +/* 0x00ec */ st %o2,[%o0] +/* 0x00f0 205 */ srax %g5,32,%g5 + .L900000505: +/* 0x00f4 203 */ ld [%o3],%o2 +/* 0x00f8 205 */ add %o5,1,%o5 +/* 0x00fc */ add %o3,4,%o3 +/* 0x0100 */ cmp %o5,%g4 +/* 0x0104 */ add %o4,4,%o4 +/* 0x0108 201 */ sub %g1,%o2,%g1 +/* 0x010c */ add %g1,%g5,%g5 +/* 0x0110 204 */ and %g5,%g3,%o2 +/* 0x0114 203 */ ld [%o4-4],%g1 +/* 0x0118 204 */ st %o2,[%o4-8] +/* 0x011c 205 */ ble,pt %icc,.L900000505 +/* 0x0120 */ srax %g5,32,%g5 + .L900000508: +/* 0x0124 203 */ ld [%o3],%g2 +/* 0x0128 201 */ sub %g1,%g2,%g1 +/* 0x012c */ add %g1,%g5,%g1 +/* 0x0130 204 */ and %g1,%g3,%g2 +/* 0x0134 */ retl ! Result = +/* 0x0138 */ st %g2,[%o4-4] + .L77000199: +/* 0x013c 203 */ ld [%o4],%g1 + .L900000509: +/* 0x0140 203 */ ld [%o3],%g2 +/* 0x0144 */ add %g5,%g1,%g1 +/* 0x0148 205 */ add %o5,1,%o5 +/* 0x014c */ add %o3,4,%o3 +/* 0x0150 */ cmp %o5,%g4 +/* 0x0154 203 */ sub %g1,%g2,%g1 +/* 0x0158 204 */ and %g1,%g3,%g2 +/* 0x015c */ st %g2,[%o4] +/* 0x0160 205 */ add %o4,4,%o4 +/* 0x0164 */ srax %g1,32,%g5 +/* 0x0168 */ ble,a,pt %icc,.L900000509 +/* 0x016c */ ld [%o4],%g1 + .L77000191: +/* 0x0170 */ retl ! Result = +/* 0x0174 */ nop +/* 0x0178 0 */ .type adjust_montf_result,2 +/* 0x0178 */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 4 +/* 000000 */ .skip 16 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global mont_mulf_noconv + mont_mulf_noconv: +/* 000000 */ save %sp,-144,%sp + .L900000646: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5 + +! 206 ! } +! 207 ! } +! 208 !} +! 213 !/* +! 214 !** the lengths of the input arrays should be at least the following: +! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 216 !** all of them should be different from one another +! 217 !** +! 218 !*/ +! 219 !void mont_mulf_noconv(unsigned int *result, +! 220 ! double *dm1, double *dm2, double *dt, +! 221 ! double *dn, unsigned int *nint, +! 222 ! int nlen, double dn0) +! 223 !{ +! 224 ! int i, j, jj; +! 225 ! int tmp; +! 226 ! double digit, m2j, nextm2j, a, b; +! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 229 ! pdm1=&(dm1[0]); +! 230 ! pdm2=&(dm2[0]); +! 231 ! pdn=&(dn[0]); +! 232 ! pdm2[2*nlen]=Zero; + +/* 0x000c 232 */ ld [%fp+92],%o1 +/* 0x0010 */ sethi %hi(Zero),%g2 +/* 0x0014 223 */ ldd [%fp+96],%f2 +/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5 +/* 0x001c 232 */ add %g2,%lo(Zero),%g2 +/* 0x0020 223 */ st %i0,[%fp+68] +/* 0x0024 */ add %g5,%o7,%o3 + +! 234 ! if (nlen!=16) +! 235 ! { +! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 238 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); + +/* 0x0028 239 */ sethi %hi(TwoToMinus16),%g3 +/* 0x002c 232 */ ld [%o3+%g2],%l0 +/* 0x0030 239 */ sethi %hi(TwoTo16),%g4 +/* 0x0034 223 */ or %g0,%i2,%o2 +/* 0x0038 */ fmovd %f2,%f16 +/* 0x003c */ st %i5,[%fp+88] +/* 0x0040 239 */ add %g3,%lo(TwoToMinus16),%g2 +/* 0x0044 223 */ or %g0,%i1,%i2 +/* 0x0048 232 */ ldd [%l0],%f0 +/* 0x004c 239 */ add %g4,%lo(TwoTo16),%g3 +/* 0x0050 223 */ or %g0,%i3,%o0 +/* 0x0054 232 */ sll %o1,4,%g4 +/* 0x0058 239 */ ld [%o3+%g2],%g5 +/* 0x005c 223 */ or %g0,%i3,%i1 +/* 0x0060 239 */ ld [%o3+%g3],%g1 +/* 0x0064 232 */ or %g0,%o1,%i0 +/* 0x0068 */ or %g0,%o2,%i3 +/* 0x006c 234 */ cmp %o1,16 +/* 0x0070 */ be,pn %icc,.L77000279 +/* 0x0074 */ std %f0,[%o2+%g4] +/* 0x0078 236 */ sll %o1,2,%g2 +/* 0x007c */ or %g0,%o0,%o3 +/* 0x0080 232 */ sll %o1,1,%o1 +/* 0x0084 236 */ add %g2,2,%o2 +/* 0x0088 */ cmp %o2,0 +/* 0x008c */ ble,a,pt %icc,.L900000660 +/* 0x0090 */ ldd [%i2],%f0 + +! 241 ! pdtj=&(dt[0]); +! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 243 ! { +! 244 ! m2j=pdm2[j]; +! 245 ! a=pdtj[0]+pdn[0]*digit; +! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 247 ! pdtj[1]=b; +! 249 !#pragma pipeloop(0) +! 250 ! for(i=1;i>32); +! 122 ! d=(long long)d16[2*i+3]; +! 123 ! t1+=(b&0xffff)<<16; + +/* 0x0068 123 */ and %i2,%o5,%i4 +/* 0x006c */ sllx %i4,16,%o1 +/* 0x0070 117 */ cmp %g2,6 +/* 0x0074 */ bl,pn %icc,.L77000134 +/* 0x0078 */ or %g0,3,%i0 +/* 0x007c 119 */ ldd [%o4+16],%f0 +/* 0x0080 120 */ and %i3,%g3,%o3 + +! 124 ! t+=(b>>16)+(t1>>32); + +/* 0x0084 124 */ srax %i2,16,%i5 +/* 0x0088 117 */ add %o3,%o1,%i4 +/* 0x008c 121 */ srax %i3,32,%i3 +/* 0x0090 119 */ fdtox %f0,%f0 +/* 0x0094 */ std %f0,[%sp+2231] + +! 125 ! i32[i]=t1&0xffffffff; + +/* 0x0098 125 */ and %i4,%g3,%l0 +/* 0x009c 117 */ or %g0,72,%o3 +/* 0x00a0 122 */ ldd [%g4+24],%f0 +/* 0x00a4 117 */ or %g0,64,%o4 +/* 0x00a8 */ or %g0,4,%o1 + +! 126 ! t1=t; +! 127 ! a=c; +! 128 ! b=d; + +/* 0x00ac 128 */ or %g0,5,%i0 +/* 0x00b0 */ or %g0,4,%i1 +/* 0x00b4 119 */ ldx [%sp+2231],%g2 +/* 0x00b8 122 */ fdtox %f0,%f0 +/* 0x00bc 128 */ or %g0,4,%o0 +/* 0x00c0 122 */ std %f0,[%sp+2223] +/* 0x00c4 */ ldd [%g4+40],%f2 +/* 0x00c8 120 */ and %g2,%g3,%i2 +/* 0x00cc 119 */ ldd [%g4+32],%f0 +/* 0x00d0 121 */ srax %g2,32,%g2 +/* 0x00d4 122 */ ldd [%g4+56],%f4 +/* 0x00d8 */ fdtox %f2,%f2 +/* 0x00dc */ ldx [%sp+2223],%g5 +/* 0x00e0 119 */ fdtox %f0,%f0 +/* 0x00e4 125 */ st %l0,[%g1] +/* 0x00e8 124 */ srax %i4,32,%l0 +/* 0x00ec 122 */ fdtox %f4,%f4 +/* 0x00f0 */ std %f2,[%sp+2223] +/* 0x00f4 123 */ and %g5,%o5,%i4 +/* 0x00f8 124 */ add %i5,%l0,%i5 +/* 0x00fc 119 */ std %f0,[%sp+2231] +/* 0x0100 123 */ sllx %i4,16,%i4 +/* 0x0104 124 */ add %i3,%i5,%i3 +/* 0x0108 119 */ ldd [%g4+48],%f2 +/* 0x010c 124 */ srax %g5,16,%g5 +/* 0x0110 117 */ add %i2,%i4,%i2 +/* 0x0114 122 */ ldd [%g4+72],%f0 +/* 0x0118 117 */ add %i2,%i3,%i4 +/* 0x011c 124 */ srax %i4,32,%i5 +/* 0x0120 119 */ fdtox %f2,%f2 +/* 0x0124 125 */ and %i4,%g3,%i4 +/* 0x0128 122 */ ldx [%sp+2223],%i2 +/* 0x012c 124 */ add %g5,%i5,%g5 +/* 0x0130 119 */ ldx [%sp+2231],%i3 +/* 0x0134 124 */ add %g2,%g5,%g5 +/* 0x0138 119 */ std %f2,[%sp+2231] +/* 0x013c 122 */ std %f4,[%sp+2223] +/* 0x0140 119 */ ldd [%g4+64],%f2 +/* 0x0144 125 */ st %i4,[%g1+4] + .L900000108: +/* 0x0148 122 */ ldx [%sp+2223],%i4 +/* 0x014c 128 */ add %o0,2,%o0 +/* 0x0150 */ add %i0,4,%i0 +/* 0x0154 119 */ ldx [%sp+2231],%l0 +/* 0x0158 117 */ add %o3,16,%o3 +/* 0x015c 123 */ and %i2,%o5,%g2 +/* 0x0160 */ sllx %g2,16,%i5 +/* 0x0164 120 */ and %i3,%g3,%g2 +/* 0x0168 122 */ ldd [%g4+%o3],%f4 +/* 0x016c */ fdtox %f0,%f0 +/* 0x0170 */ std %f0,[%sp+2223] +/* 0x0174 124 */ srax %i2,16,%i2 +/* 0x0178 117 */ add %g2,%i5,%g2 +/* 0x017c 119 */ fdtox %f2,%f0 +/* 0x0180 117 */ add %o4,16,%o4 +/* 0x0184 119 */ std %f0,[%sp+2231] +/* 0x0188 117 */ add %g2,%g5,%g2 +/* 0x018c 119 */ ldd [%g4+%o4],%f2 +/* 0x0190 124 */ srax %g2,32,%i5 +/* 0x0194 128 */ cmp %o0,%o2 +/* 0x0198 121 */ srax %i3,32,%g5 +/* 0x019c 124 */ add %i2,%i5,%i2 +/* 0x01a0 */ add %g5,%i2,%i5 +/* 0x01a4 117 */ add %o1,4,%o1 +/* 0x01a8 125 */ and %g2,%g3,%g2 +/* 0x01ac 127 */ or %g0,%l0,%g5 +/* 0x01b0 125 */ st %g2,[%g1+%o1] +/* 0x01b4 128 */ add %i1,4,%i1 +/* 0x01b8 122 */ ldx [%sp+2223],%i2 +/* 0x01bc 119 */ ldx [%sp+2231],%i3 +/* 0x01c0 117 */ add %o3,16,%o3 +/* 0x01c4 123 */ and %i4,%o5,%g2 +/* 0x01c8 */ sllx %g2,16,%l0 +/* 0x01cc 120 */ and %g5,%g3,%g2 +/* 0x01d0 122 */ ldd [%g4+%o3],%f0 +/* 0x01d4 */ fdtox %f4,%f4 +/* 0x01d8 */ std %f4,[%sp+2223] +/* 0x01dc 124 */ srax %i4,16,%i4 +/* 0x01e0 117 */ add %g2,%l0,%g2 +/* 0x01e4 119 */ fdtox %f2,%f2 +/* 0x01e8 117 */ add %o4,16,%o4 +/* 0x01ec 119 */ std %f2,[%sp+2231] +/* 0x01f0 117 */ add %g2,%i5,%g2 +/* 0x01f4 119 */ ldd [%g4+%o4],%f2 +/* 0x01f8 124 */ srax %g2,32,%i5 +/* 0x01fc 121 */ srax %g5,32,%g5 +/* 0x0200 124 */ add %i4,%i5,%i4 +/* 0x0204 */ add %g5,%i4,%g5 +/* 0x0208 117 */ add %o1,4,%o1 +/* 0x020c 125 */ and %g2,%g3,%g2 +/* 0x0210 128 */ ble,pt %icc,.L900000108 +/* 0x0214 */ st %g2,[%g1+%o1] + .L900000111: +/* 0x0218 122 */ ldx [%sp+2223],%o2 +/* 0x021c 123 */ and %i2,%o5,%i4 +/* 0x0220 120 */ and %i3,%g3,%g2 +/* 0x0224 123 */ sllx %i4,16,%i4 +/* 0x0228 119 */ ldx [%sp+2231],%i5 +/* 0x022c 128 */ cmp %o0,%o7 +/* 0x0230 124 */ srax %i2,16,%i2 +/* 0x0234 117 */ add %g2,%i4,%g2 +/* 0x0238 122 */ fdtox %f0,%f4 +/* 0x023c */ std %f4,[%sp+2223] +/* 0x0240 117 */ add %g2,%g5,%g5 +/* 0x0244 123 */ and %o2,%o5,%l0 +/* 0x0248 124 */ srax %g5,32,%l1 +/* 0x024c 120 */ and %i5,%g3,%i4 +/* 0x0250 119 */ fdtox %f2,%f0 +/* 0x0254 121 */ srax %i3,32,%g2 +/* 0x0258 119 */ std %f0,[%sp+2231] +/* 0x025c 124 */ add %i2,%l1,%i2 +/* 0x0260 123 */ sllx %l0,16,%i3 +/* 0x0264 124 */ add %g2,%i2,%i2 +/* 0x0268 */ srax %o2,16,%o2 +/* 0x026c 117 */ add %o1,4,%g2 +/* 0x0270 */ add %i4,%i3,%o1 +/* 0x0274 125 */ and %g5,%g3,%g5 +/* 0x0278 */ st %g5,[%g1+%g2] +/* 0x027c 119 */ ldx [%sp+2231],%i3 +/* 0x0280 117 */ add %o1,%i2,%o1 +/* 0x0284 */ add %g2,4,%g2 +/* 0x0288 124 */ srax %o1,32,%i4 +/* 0x028c 122 */ ldx [%sp+2223],%i2 +/* 0x0290 125 */ and %o1,%g3,%g5 +/* 0x0294 121 */ srax %i5,32,%o1 +/* 0x0298 124 */ add %o2,%i4,%o2 +/* 0x029c 125 */ st %g5,[%g1+%g2] +/* 0x02a0 128 */ bg,pn %icc,.L77000127 +/* 0x02a4 */ add %o1,%o2,%g5 +/* 0x02a8 */ add %i0,6,%i0 +/* 0x02ac */ add %i1,6,%i1 + .L77000134: +/* 0x02b0 119 */ sra %i1,0,%o2 + .L900000112: +/* 0x02b4 119 */ sllx %o2,3,%o3 +/* 0x02b8 120 */ and %i3,%g3,%o1 +/* 0x02bc 119 */ ldd [%g4+%o3],%f0 +/* 0x02c0 122 */ sra %i0,0,%o3 +/* 0x02c4 123 */ and %i2,%o5,%o2 +/* 0x02c8 122 */ sllx %o3,3,%o3 +/* 0x02cc 120 */ add %g5,%o1,%o1 +/* 0x02d0 119 */ fdtox %f0,%f0 +/* 0x02d4 */ std %f0,[%sp+2231] +/* 0x02d8 123 */ sllx %o2,16,%o2 +/* 0x02dc */ add %o1,%o2,%o2 +/* 0x02e0 128 */ add %i1,2,%i1 +/* 0x02e4 122 */ ldd [%g4+%o3],%f0 +/* 0x02e8 124 */ srax %o2,32,%g2 +/* 0x02ec 125 */ and %o2,%g3,%o3 +/* 0x02f0 124 */ srax %i2,16,%o1 +/* 0x02f4 128 */ add %i0,2,%i0 +/* 0x02f8 122 */ fdtox %f0,%f0 +/* 0x02fc */ std %f0,[%sp+2223] +/* 0x0300 125 */ sra %o0,0,%o2 +/* 0x0304 */ sllx %o2,2,%o2 +/* 0x0308 124 */ add %o1,%g2,%g5 +/* 0x030c 121 */ srax %i3,32,%g2 +/* 0x0310 128 */ add %o0,1,%o0 +/* 0x0314 124 */ add %g2,%g5,%g5 +/* 0x0318 128 */ cmp %o0,%o7 +/* 0x031c 119 */ ldx [%sp+2231],%o4 +/* 0x0320 122 */ ldx [%sp+2223],%i2 +/* 0x0324 125 */ st %o3,[%g1+%o2] +/* 0x0328 127 */ or %g0,%o4,%i3 +/* 0x032c 128 */ ble,pt %icc,.L900000112 +/* 0x0330 */ sra %i1,0,%o2 + .L77000127: + +! 129 ! } +! 130 ! t1+=a&0xffffffff; +! 131 ! t=(a>>32); +! 132 ! t1+=(b&0xffff)<<16; +! 133 ! i32[i]=t1&0xffffffff; + +/* 0x0334 133 */ sethi %hi(0xfc00),%g2 + .L900000113: +/* 0x0338 133 */ or %g0,-1,%g3 +/* 0x033c */ add %g2,1023,%g2 +/* 0x0340 */ srl %g3,0,%g3 +/* 0x0344 */ and %i2,%g2,%g2 +/* 0x0348 */ and %i3,%g3,%g4 +/* 0x034c */ sllx %g2,16,%g2 +/* 0x0350 */ add %g5,%g4,%g4 +/* 0x0354 */ sra %o0,0,%g5 +/* 0x0358 */ add %g4,%g2,%g4 +/* 0x035c */ sllx %g5,2,%g2 +/* 0x0360 */ and %g4,%g3,%g3 +/* 0x0364 */ st %g3,[%g1+%g2] +/* 0x0368 */ ret ! Result = +/* 0x036c */ restore %g0,%g0,%g0 +/* 0x0370 0 */ .type conv_d16_to_i32,2 +/* 0x0370 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000201: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 8 +/* 0x0008 */ .skip 24 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32 + conv_i32_to_d32: +/* 000000 */ or %g0,%o7,%g3 + +! 135 !} +! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 138 !{ +! 139 !int i; +! 141 !#pragma pipeloop(0) +! 142 ! for(i=0;i>16); + +/* 0x001c 156 */ sethi %hi(.L_const_seg_900000301),%g2 +/* 0x0020 147 */ or %g0,%i2,%o1 +/* 0x0024 152 */ sethi %hi(0xfc00),%g3 +/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%g2 +/* 0x002c 152 */ or %g0,%o1,%g4 +/* 0x0030 156 */ ldx [%o0+%g2],%o5 +/* 0x0034 152 */ add %g3,1023,%g1 +/* 0x0038 147 */ or %g0,%i1,%o7 +/* 0x003c 152 */ or %g0,0,%i2 +/* 0x0040 */ sub %o1,1,%g5 +/* 0x0044 */ or %g0,0,%g3 +/* 0x0048 */ or %g0,1,%g2 +/* 0x004c 154 */ or %g0,0,%o2 +/* 0x0050 */ cmp %g4,6 +/* 0x0054 152 */ bl,pn %icc,.L77000154 +/* 0x0058 */ ldd [%o5],%f0 +/* 0x005c */ sub %o1,2,%o3 +/* 0x0060 */ or %g0,16,%o2 +/* 0x0064 154 */ ld [%i1],%o4 +/* 0x0068 156 */ or %g0,3,%g2 +/* 0x006c */ or %g0,2,%g3 +/* 0x0070 155 */ fmovs %f0,%f2 +/* 0x0074 156 */ or %g0,4,%i2 +/* 0x0078 155 */ and %o4,%g1,%o0 +/* 0x007c */ st %o0,[%sp+2227] +/* 0x0080 */ fmovs %f0,%f4 +/* 0x0084 156 */ srl %o4,16,%i4 +/* 0x0088 152 */ or %g0,12,%o4 +/* 0x008c */ or %g0,24,%o0 +/* 0x0090 155 */ ld [%sp+2227],%f3 +/* 0x0094 */ fsubd %f2,%f0,%f2 +/* 0x0098 */ std %f2,[%i0] +/* 0x009c 156 */ st %i4,[%sp+2223] +/* 0x00a0 154 */ ld [%o7+4],%o1 +/* 0x00a4 156 */ fmovs %f0,%f2 +/* 0x00a8 155 */ and %o1,%g1,%i1 +/* 0x00ac 156 */ ld [%sp+2223],%f3 +/* 0x00b0 */ srl %o1,16,%o1 +/* 0x00b4 */ fsubd %f2,%f0,%f2 +/* 0x00b8 */ std %f2,[%i0+8] +/* 0x00bc */ st %o1,[%sp+2223] +/* 0x00c0 155 */ st %i1,[%sp+2227] +/* 0x00c4 154 */ ld [%o7+8],%o1 +/* 0x00c8 156 */ fmovs %f0,%f2 +/* 0x00cc 155 */ and %o1,%g1,%g4 +/* 0x00d0 */ ld [%sp+2227],%f5 +/* 0x00d4 156 */ srl %o1,16,%o1 +/* 0x00d8 */ ld [%sp+2223],%f3 +/* 0x00dc */ st %o1,[%sp+2223] +/* 0x00e0 155 */ fsubd %f4,%f0,%f4 +/* 0x00e4 */ st %g4,[%sp+2227] +/* 0x00e8 156 */ fsubd %f2,%f0,%f2 +/* 0x00ec 154 */ ld [%o7+12],%o1 +/* 0x00f0 155 */ std %f4,[%i0+16] +/* 0x00f4 156 */ std %f2,[%i0+24] + .L900000306: +/* 0x00f8 155 */ ld [%sp+2227],%f5 +/* 0x00fc 156 */ add %i2,2,%i2 +/* 0x0100 */ add %g2,4,%g2 +/* 0x0104 */ ld [%sp+2223],%f3 +/* 0x0108 */ cmp %i2,%o3 +/* 0x010c */ add %g3,4,%g3 +/* 0x0110 155 */ and %o1,%g1,%g4 +/* 0x0114 156 */ srl %o1,16,%o1 +/* 0x0118 155 */ st %g4,[%sp+2227] +/* 0x011c 156 */ st %o1,[%sp+2223] +/* 0x0120 152 */ add %o4,4,%o1 +/* 0x0124 154 */ ld [%o7+%o1],%o4 +/* 0x0128 156 */ fmovs %f0,%f2 +/* 0x012c 155 */ fmovs %f0,%f4 +/* 0x0130 */ fsubd %f4,%f0,%f4 +/* 0x0134 152 */ add %o2,16,%o2 +/* 0x0138 156 */ fsubd %f2,%f0,%f2 +/* 0x013c 155 */ std %f4,[%i0+%o2] +/* 0x0140 152 */ add %o0,16,%o0 +/* 0x0144 156 */ std %f2,[%i0+%o0] +/* 0x0148 155 */ ld [%sp+2227],%f5 +/* 0x014c 156 */ ld [%sp+2223],%f3 +/* 0x0150 155 */ and %o4,%g1,%g4 +/* 0x0154 156 */ srl %o4,16,%o4 +/* 0x0158 155 */ st %g4,[%sp+2227] +/* 0x015c 156 */ st %o4,[%sp+2223] +/* 0x0160 152 */ add %o1,4,%o4 +/* 0x0164 154 */ ld [%o7+%o4],%o1 +/* 0x0168 156 */ fmovs %f0,%f2 +/* 0x016c 155 */ fmovs %f0,%f4 +/* 0x0170 */ fsubd %f4,%f0,%f4 +/* 0x0174 152 */ add %o2,16,%o2 +/* 0x0178 156 */ fsubd %f2,%f0,%f2 +/* 0x017c 155 */ std %f4,[%i0+%o2] +/* 0x0180 152 */ add %o0,16,%o0 +/* 0x0184 156 */ ble,pt %icc,.L900000306 +/* 0x0188 */ std %f2,[%i0+%o0] + .L900000309: +/* 0x018c 155 */ ld [%sp+2227],%f5 +/* 0x0190 156 */ fmovs %f0,%f2 +/* 0x0194 */ srl %o1,16,%o3 +/* 0x0198 */ ld [%sp+2223],%f3 +/* 0x019c 155 */ and %o1,%g1,%i1 +/* 0x01a0 152 */ add %o2,16,%g4 +/* 0x01a4 155 */ fmovs %f0,%f4 +/* 0x01a8 */ st %i1,[%sp+2227] +/* 0x01ac 152 */ add %o0,16,%o2 +/* 0x01b0 156 */ st %o3,[%sp+2223] +/* 0x01b4 154 */ sra %i2,0,%o3 +/* 0x01b8 152 */ add %g4,16,%o1 +/* 0x01bc 155 */ fsubd %f4,%f0,%f4 +/* 0x01c0 */ std %f4,[%i0+%g4] +/* 0x01c4 152 */ add %o0,32,%o0 +/* 0x01c8 156 */ fsubd %f2,%f0,%f2 +/* 0x01cc */ std %f2,[%i0+%o2] +/* 0x01d0 */ sllx %o3,2,%o2 +/* 0x01d4 155 */ ld [%sp+2227],%f5 +/* 0x01d8 156 */ cmp %i2,%g5 +/* 0x01dc */ add %g2,6,%g2 +/* 0x01e0 */ ld [%sp+2223],%f3 +/* 0x01e4 */ add %g3,6,%g3 +/* 0x01e8 155 */ fmovs %f0,%f4 +/* 0x01ec 156 */ fmovs %f0,%f2 +/* 0x01f0 155 */ fsubd %f4,%f0,%f4 +/* 0x01f4 */ std %f4,[%i0+%o1] +/* 0x01f8 156 */ fsubd %f2,%f0,%f0 +/* 0x01fc */ bg,pn %icc,.L77000150 +/* 0x0200 */ std %f0,[%i0+%o0] + .L77000154: +/* 0x0204 155 */ ldd [%o5],%f0 + .L900000311: +/* 0x0208 154 */ ld [%o7+%o2],%o0 +/* 0x020c 155 */ sra %g3,0,%o1 +/* 0x0210 */ fmovs %f0,%f2 +/* 0x0214 */ sllx %o1,3,%o2 +/* 0x0218 156 */ add %i2,1,%i2 +/* 0x021c 155 */ and %o0,%g1,%o1 +/* 0x0220 */ st %o1,[%sp+2227] +/* 0x0224 156 */ add %g3,2,%g3 +/* 0x0228 */ srl %o0,16,%o1 +/* 0x022c */ cmp %i2,%g5 +/* 0x0230 */ sra %g2,0,%o0 +/* 0x0234 */ add %g2,2,%g2 +/* 0x0238 */ sllx %o0,3,%o0 +/* 0x023c 155 */ ld [%sp+2227],%f3 +/* 0x0240 154 */ sra %i2,0,%o3 +/* 0x0244 155 */ fsubd %f2,%f0,%f2 +/* 0x0248 */ std %f2,[%i0+%o2] +/* 0x024c */ sllx %o3,2,%o2 +/* 0x0250 156 */ st %o1,[%sp+2223] +/* 0x0254 */ fmovs %f0,%f2 +/* 0x0258 */ ld [%sp+2223],%f3 +/* 0x025c */ fsubd %f2,%f0,%f0 +/* 0x0260 */ std %f0,[%i0+%o0] +/* 0x0264 */ ble,a,pt %icc,.L900000311 +/* 0x0268 */ ldd [%o5],%f0 + .L77000150: +/* 0x026c */ ret ! Result = +/* 0x0270 */ restore %g0,%g0,%g0 +/* 0x0274 0 */ .type conv_i32_to_d16,2 +/* 0x0274 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000401: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 8 +/* 0x0008 */ .skip 24 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: +/* 000000 */ save %sp,-192,%sp + .L900000415: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3 + +! 157 ! } +! 158 !} +! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 162 ! unsigned int *i32, int len) +! 163 !{ +! 164 !int i = 0; +! 165 !unsigned int a; +! 167 !#pragma pipeloop(0) +! 168 !#ifdef RF_INLINE_MACROS +! 169 ! for(;i>16); + +/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2 +/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%g2 +/* 0x0160 175 */ sethi %hi(0xfc00),%g3 +/* 0x0164 180 */ ldx [%o3+%g2],%g1 +/* 0x0168 175 */ sll %i2,1,%i4 +/* 0x016c */ sub %g5,%i2,%g4 +/* 0x0170 177 */ sra %i2,0,%o3 +/* 0x0174 175 */ add %g3,1023,%g3 +/* 0x0178 178 */ ldd [%g1],%f2 +/* 0x017c */ sllx %o3,2,%o2 +/* 0x0180 175 */ add %i4,1,%g2 +/* 0x0184 177 */ or %g0,%o3,%o1 +/* 0x0188 */ cmp %g4,6 +/* 0x018c 175 */ bl,pn %icc,.L77000161 +/* 0x0190 */ sra %i2,0,%o3 +/* 0x0194 177 */ or %g0,%o2,%o0 +/* 0x0198 178 */ ld [%i0+%o2],%f5 +/* 0x019c 179 */ fmovs %f2,%f8 +/* 0x01a0 175 */ add %o0,4,%o3 +/* 0x01a4 177 */ ld [%i0+%o0],%o7 +/* 0x01a8 180 */ fmovs %f2,%f6 +/* 0x01ac 178 */ fmovs %f2,%f4 +/* 0x01b0 */ sllx %o1,3,%o2 +/* 0x01b4 175 */ add %o3,4,%o5 +/* 0x01b8 179 */ sra %i4,0,%o0 +/* 0x01bc 175 */ add %o3,8,%o4 +/* 0x01c0 178 */ fsubd %f4,%f2,%f4 +/* 0x01c4 */ std %f4,[%i3+%o2] +/* 0x01c8 179 */ sllx %o0,3,%i5 +/* 0x01cc */ and %o7,%g3,%o0 +/* 0x01d0 */ st %o0,[%sp+2227] +/* 0x01d4 175 */ add %i5,16,%o1 +/* 0x01d8 180 */ srl %o7,16,%g4 +/* 0x01dc */ add %i2,1,%i2 +/* 0x01e0 */ sra %g2,0,%o0 +/* 0x01e4 175 */ add %o2,8,%o2 +/* 0x01e8 179 */ fmovs %f2,%f4 +/* 0x01ec 180 */ sllx %o0,3,%l0 +/* 0x01f0 */ add %i4,3,%g2 +/* 0x01f4 179 */ ld [%sp+2227],%f5 +/* 0x01f8 175 */ add %l0,16,%o0 +/* 0x01fc 180 */ add %i4,2,%i4 +/* 0x0200 175 */ sub %g5,1,%o7 +/* 0x0204 180 */ add %i2,3,%i2 +/* 0x0208 179 */ fsubd %f4,%f2,%f4 +/* 0x020c */ std %f4,[%i1+%i5] +/* 0x0210 180 */ st %g4,[%sp+2223] +/* 0x0214 177 */ ld [%i0+%o3],%i5 +/* 0x0218 180 */ fmovs %f2,%f4 +/* 0x021c */ srl %i5,16,%g4 +/* 0x0220 179 */ and %i5,%g3,%i5 +/* 0x0224 180 */ ld [%sp+2223],%f5 +/* 0x0228 */ fsubd %f4,%f2,%f4 +/* 0x022c */ std %f4,[%i1+%l0] +/* 0x0230 */ st %g4,[%sp+2223] +/* 0x0234 177 */ ld [%i0+%o5],%g4 +/* 0x0238 179 */ st %i5,[%sp+2227] +/* 0x023c 178 */ fmovs %f2,%f4 +/* 0x0240 180 */ srl %g4,16,%i5 +/* 0x0244 179 */ and %g4,%g3,%g4 +/* 0x0248 180 */ ld [%sp+2223],%f7 +/* 0x024c */ st %i5,[%sp+2223] +/* 0x0250 178 */ ld [%i0+%o3],%f5 +/* 0x0254 180 */ fsubd %f6,%f2,%f6 +/* 0x0258 177 */ ld [%i0+%o4],%o3 +/* 0x025c 178 */ fsubd %f4,%f2,%f4 +/* 0x0260 179 */ ld [%sp+2227],%f9 +/* 0x0264 180 */ ld [%sp+2223],%f1 +/* 0x0268 179 */ st %g4,[%sp+2227] +/* 0x026c */ fsubd %f8,%f2,%f8 +/* 0x0270 */ std %f8,[%i1+%o1] +/* 0x0274 180 */ std %f6,[%i1+%o0] +/* 0x0278 178 */ std %f4,[%i3+%o2] + .L900000411: +/* 0x027c 179 */ ld [%sp+2227],%f13 +/* 0x0280 180 */ srl %o3,16,%g4 +/* 0x0284 */ add %i2,2,%i2 +/* 0x0288 */ st %g4,[%sp+2223] +/* 0x028c */ cmp %i2,%o7 +/* 0x0290 */ add %g2,4,%g2 +/* 0x0294 178 */ ld [%i0+%o5],%f11 +/* 0x0298 180 */ add %i4,4,%i4 +/* 0x029c 175 */ add %o4,4,%o5 +/* 0x02a0 177 */ ld [%i0+%o5],%g4 +/* 0x02a4 179 */ and %o3,%g3,%o3 +/* 0x02a8 */ st %o3,[%sp+2227] +/* 0x02ac 180 */ fmovs %f2,%f0 +/* 0x02b0 179 */ fmovs %f2,%f12 +/* 0x02b4 180 */ fsubd %f0,%f2,%f8 +/* 0x02b8 179 */ fsubd %f12,%f2,%f4 +/* 0x02bc 175 */ add %o1,16,%o1 +/* 0x02c0 180 */ ld [%sp+2223],%f7 +/* 0x02c4 178 */ fmovs %f2,%f10 +/* 0x02c8 179 */ std %f4,[%i1+%o1] +/* 0x02cc 175 */ add %o0,16,%o0 +/* 0x02d0 178 */ fsubd %f10,%f2,%f4 +/* 0x02d4 175 */ add %o2,8,%o2 +/* 0x02d8 180 */ std %f8,[%i1+%o0] +/* 0x02dc 178 */ std %f4,[%i3+%o2] +/* 0x02e0 179 */ ld [%sp+2227],%f9 +/* 0x02e4 180 */ srl %g4,16,%o3 +/* 0x02e8 */ st %o3,[%sp+2223] +/* 0x02ec 178 */ ld [%i0+%o4],%f5 +/* 0x02f0 175 */ add %o4,8,%o4 +/* 0x02f4 177 */ ld [%i0+%o4],%o3 +/* 0x02f8 179 */ and %g4,%g3,%g4 +/* 0x02fc */ st %g4,[%sp+2227] +/* 0x0300 180 */ fmovs %f2,%f6 +/* 0x0304 179 */ fmovs %f2,%f8 +/* 0x0308 180 */ fsubd %f6,%f2,%f6 +/* 0x030c 179 */ fsubd %f8,%f2,%f8 +/* 0x0310 175 */ add %o1,16,%o1 +/* 0x0314 180 */ ld [%sp+2223],%f1 +/* 0x0318 178 */ fmovs %f2,%f4 +/* 0x031c 179 */ std %f8,[%i1+%o1] +/* 0x0320 175 */ add %o0,16,%o0 +/* 0x0324 178 */ fsubd %f4,%f2,%f4 +/* 0x0328 175 */ add %o2,8,%o2 +/* 0x032c 180 */ std %f6,[%i1+%o0] +/* 0x0330 */ bl,pt %icc,.L900000411 +/* 0x0334 */ std %f4,[%i3+%o2] + .L900000414: +/* 0x0338 180 */ srl %o3,16,%o7 +/* 0x033c */ st %o7,[%sp+2223] +/* 0x0340 179 */ fmovs %f2,%f12 +/* 0x0344 178 */ ld [%i0+%o5],%f11 +/* 0x0348 180 */ fmovs %f2,%f0 +/* 0x034c 179 */ and %o3,%g3,%g4 +/* 0x0350 180 */ fmovs %f2,%f6 +/* 0x0354 175 */ add %o1,16,%o3 +/* 0x0358 */ add %o0,16,%o7 +/* 0x035c 178 */ fmovs %f2,%f10 +/* 0x0360 175 */ add %o2,8,%o2 +/* 0x0364 */ add %o1,32,%o5 +/* 0x0368 179 */ ld [%sp+2227],%f13 +/* 0x036c 178 */ fmovs %f2,%f4 +/* 0x0370 175 */ add %o0,32,%o1 +/* 0x0374 180 */ ld [%sp+2223],%f7 +/* 0x0378 175 */ add %o2,8,%o0 +/* 0x037c 180 */ cmp %i2,%g5 +/* 0x0380 179 */ st %g4,[%sp+2227] +/* 0x0384 */ fsubd %f12,%f2,%f8 +/* 0x0388 180 */ add %g2,6,%g2 +/* 0x038c 179 */ std %f8,[%i1+%o3] +/* 0x0390 180 */ fsubd %f0,%f2,%f0 +/* 0x0394 177 */ sra %i2,0,%o3 +/* 0x0398 180 */ std %f0,[%i1+%o7] +/* 0x039c 178 */ fsubd %f10,%f2,%f0 +/* 0x03a0 180 */ add %i4,6,%i4 +/* 0x03a4 178 */ std %f0,[%i3+%o2] +/* 0x03a8 */ sllx %o3,2,%o2 +/* 0x03ac 179 */ ld [%sp+2227],%f9 +/* 0x03b0 178 */ ld [%i0+%o4],%f5 +/* 0x03b4 179 */ fmovs %f2,%f8 +/* 0x03b8 */ fsubd %f8,%f2,%f0 +/* 0x03bc */ std %f0,[%i1+%o5] +/* 0x03c0 180 */ fsubd %f6,%f2,%f0 +/* 0x03c4 */ std %f0,[%i1+%o1] +/* 0x03c8 178 */ fsubd %f4,%f2,%f0 +/* 0x03cc 180 */ bge,pn %icc,.L77000164 +/* 0x03d0 */ std %f0,[%i3+%o0] + .L77000161: +/* 0x03d4 178 */ ldd [%g1],%f2 + .L900000416: +/* 0x03d8 178 */ ld [%i0+%o2],%f5 +/* 0x03dc 179 */ sra %i4,0,%o0 +/* 0x03e0 180 */ add %i2,1,%i2 +/* 0x03e4 177 */ ld [%i0+%o2],%o1 +/* 0x03e8 178 */ sllx %o3,3,%o3 +/* 0x03ec 180 */ add %i4,2,%i4 +/* 0x03f0 178 */ fmovs %f2,%f4 +/* 0x03f4 179 */ sllx %o0,3,%o4 +/* 0x03f8 180 */ cmp %i2,%g5 +/* 0x03fc 179 */ and %o1,%g3,%o0 +/* 0x0400 178 */ fsubd %f4,%f2,%f0 +/* 0x0404 */ std %f0,[%i3+%o3] +/* 0x0408 180 */ srl %o1,16,%o1 +/* 0x040c 179 */ st %o0,[%sp+2227] +/* 0x0410 180 */ sra %g2,0,%o0 +/* 0x0414 */ add %g2,2,%g2 +/* 0x0418 177 */ sra %i2,0,%o3 +/* 0x041c 180 */ sllx %o0,3,%o0 +/* 0x0420 179 */ fmovs %f2,%f4 +/* 0x0424 */ sllx %o3,2,%o2 +/* 0x0428 */ ld [%sp+2227],%f5 +/* 0x042c */ fsubd %f4,%f2,%f0 +/* 0x0430 */ std %f0,[%i1+%o4] +/* 0x0434 180 */ st %o1,[%sp+2223] +/* 0x0438 */ fmovs %f2,%f4 +/* 0x043c */ ld [%sp+2223],%f5 +/* 0x0440 */ fsubd %f4,%f2,%f0 +/* 0x0444 */ std %f0,[%i1+%o0] +/* 0x0448 */ bl,a,pt %icc,.L900000416 +/* 0x044c */ ldd [%g1],%f2 + .L77000164: +/* 0x0450 */ ret ! Result = +/* 0x0454 */ restore %g0,%g0,%g0 +/* 0x0458 0 */ .type conv_i32_to_d32_and_d16,2 +/* 0x0458 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global adjust_montf_result + adjust_montf_result: +/* 000000 */ save %sp,-176,%sp +/* 0x0004 */ or %g0,%i2,%o1 +/* 0x0008 */ or %g0,%i0,%i2 + +! 181 ! } +! 182 !} +! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 186 !{ +! 187 !long long acc; +! 188 !int i; +! 190 ! if(i32[len]>0) i=-1; + +/* 0x000c 190 */ sra %o1,0,%g2 +/* 0x0010 */ or %g0,-1,%o2 +/* 0x0014 */ sllx %g2,2,%g2 +/* 0x0018 */ ld [%i2+%g2],%g2 +/* 0x001c */ cmp %g2,0 +/* 0x0020 */ bleu,pn %icc,.L77000175 +/* 0x0024 */ or %g0,%i1,%i0 +/* 0x0028 */ ba .L900000511 +/* 0x002c */ cmp %o2,0 + .L77000175: + +! 191 ! else +! 192 ! { +! 193 ! for(i=len-1; i>=0; i--) + +/* 0x0030 193 */ sub %o1,1,%o2 +/* 0x0034 */ cmp %o2,0 +/* 0x0038 */ bl,pn %icc,.L77000182 +/* 0x003c */ sra %o2,0,%g2 + .L900000510: + +! 194 ! { +! 195 ! if(i32[i]!=nint[i]) break; + +/* 0x0040 195 */ sllx %g2,2,%g2 +/* 0x0044 */ sub %o2,1,%o0 +/* 0x0048 */ ld [%i1+%g2],%g3 +/* 0x004c */ ld [%i2+%g2],%g2 +/* 0x0050 */ cmp %g2,%g3 +/* 0x0054 */ bne,pn %icc,.L77000182 +/* 0x0058 */ nop +/* 0x005c 0 */ or %g0,%o0,%o2 +/* 0x0060 195 */ cmp %o0,0 +/* 0x0064 */ bge,pt %icc,.L900000510 +/* 0x0068 */ sra %o2,0,%g2 + .L77000182: + +! 196 ! } +! 197 ! } +! 198 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x006c 198 */ cmp %o2,0 + .L900000511: +/* 0x0070 198 */ bl,pn %icc,.L77000198 +/* 0x0074 */ sra %o2,0,%g2 +/* 0x0078 */ sllx %g2,2,%g2 +/* 0x007c */ ld [%i1+%g2],%g3 +/* 0x0080 */ ld [%i2+%g2],%g2 +/* 0x0084 */ cmp %g2,%g3 +/* 0x0088 */ bleu,pt %icc,.L77000191 +/* 0x008c */ nop + .L77000198: + +! 199 ! { +! 200 ! acc=0; +! 201 ! for(i=0;i>32; + +/* 0x00c8 205 */ or %g0,5,%i1 +/* 0x00cc 203 */ ld [%i0],%o2 +/* 0x00d0 201 */ or %g0,8,%o5 +/* 0x00d4 */ or %g0,12,%o4 +/* 0x00d8 203 */ ld [%i0+4],%o3 +/* 0x00dc 201 */ or %g0,16,%g1 +/* 0x00e0 203 */ ld [%i2+4],%o0 +/* 0x00e4 201 */ sub %o1,%o2,%o1 +/* 0x00e8 203 */ ld [%i0+8],%i3 +/* 0x00ec 204 */ and %o1,%g2,%g5 +/* 0x00f0 */ st %g5,[%i2] +/* 0x00f4 205 */ srax %o1,32,%g5 +/* 0x00f8 201 */ sub %o0,%o3,%o0 +/* 0x00fc 203 */ ld [%i0+12],%o2 +/* 0x0100 201 */ add %o0,%g5,%o0 +/* 0x0104 204 */ and %o0,%g2,%g5 +/* 0x0108 */ st %g5,[%i2+4] +/* 0x010c 205 */ srax %o0,32,%o0 +/* 0x0110 203 */ ld [%i2+8],%o1 +/* 0x0114 */ ld [%i2+12],%o3 +/* 0x0118 201 */ sub %o1,%i3,%o1 + .L900000505: +/* 0x011c */ add %g1,4,%g3 +/* 0x0120 203 */ ld [%g1+%i2],%g5 +/* 0x0124 201 */ add %o1,%o0,%o0 +/* 0x0128 203 */ ld [%i0+%g1],%i3 +/* 0x012c 201 */ sub %o3,%o2,%o1 +/* 0x0130 204 */ and %o0,%g2,%o2 +/* 0x0134 */ st %o2,[%o5+%i2] +/* 0x0138 205 */ srax %o0,32,%o2 +/* 0x013c */ add %i1,4,%i1 +/* 0x0140 201 */ add %g1,8,%o5 +/* 0x0144 203 */ ld [%g3+%i2],%o0 +/* 0x0148 201 */ add %o1,%o2,%o1 +/* 0x014c 203 */ ld [%i0+%g3],%o3 +/* 0x0150 201 */ sub %g5,%i3,%o2 +/* 0x0154 204 */ and %o1,%g2,%g5 +/* 0x0158 */ st %g5,[%o4+%i2] +/* 0x015c 205 */ srax %o1,32,%g5 +/* 0x0160 */ cmp %i1,%o7 +/* 0x0164 201 */ add %g1,12,%o4 +/* 0x0168 203 */ ld [%o5+%i2],%o1 +/* 0x016c 201 */ add %o2,%g5,%o2 +/* 0x0170 203 */ ld [%i0+%o5],%i3 +/* 0x0174 201 */ sub %o0,%o3,%o0 +/* 0x0178 204 */ and %o2,%g2,%o3 +/* 0x017c */ st %o3,[%g1+%i2] +/* 0x0180 205 */ srax %o2,32,%g5 +/* 0x0184 203 */ ld [%o4+%i2],%o3 +/* 0x0188 201 */ add %g1,16,%g1 +/* 0x018c */ add %o0,%g5,%o0 +/* 0x0190 203 */ ld [%i0+%o4],%o2 +/* 0x0194 201 */ sub %o1,%i3,%o1 +/* 0x0198 204 */ and %o0,%g2,%g5 +/* 0x019c */ st %g5,[%g3+%i2] +/* 0x01a0 205 */ ble,pt %icc,.L900000505 +/* 0x01a4 */ srax %o0,32,%o0 + .L900000508: +/* 0x01a8 */ add %o1,%o0,%g3 +/* 0x01ac */ sub %o3,%o2,%o1 +/* 0x01b0 203 */ ld [%g1+%i2],%o0 +/* 0x01b4 */ ld [%i0+%g1],%o2 +/* 0x01b8 205 */ srax %g3,32,%o7 +/* 0x01bc 204 */ and %g3,%g2,%o3 +/* 0x01c0 201 */ add %o1,%o7,%o1 +/* 0x01c4 204 */ st %o3,[%o5+%i2] +/* 0x01c8 205 */ cmp %i1,%g4 +/* 0x01cc 201 */ sub %o0,%o2,%o0 +/* 0x01d0 204 */ and %o1,%g2,%o2 +/* 0x01d4 */ st %o2,[%o4+%i2] +/* 0x01d8 205 */ srax %o1,32,%o1 +/* 0x01dc 203 */ sra %i1,0,%o2 +/* 0x01e0 201 */ add %o0,%o1,%o0 +/* 0x01e4 205 */ srax %o0,32,%g5 +/* 0x01e8 204 */ and %o0,%g2,%o1 +/* 0x01ec */ st %o1,[%g1+%i2] +/* 0x01f0 205 */ bg,pn %icc,.L77000191 +/* 0x01f4 */ sllx %o2,2,%o1 + .L77000199: +/* 0x01f8 0 */ or %g0,%o1,%g1 + .L900000509: +/* 0x01fc 203 */ ld [%o1+%i2],%o0 +/* 0x0200 205 */ add %i1,1,%i1 +/* 0x0204 203 */ ld [%i0+%o1],%o1 +/* 0x0208 */ sra %i1,0,%o2 +/* 0x020c 205 */ cmp %i1,%g4 +/* 0x0210 203 */ add %g5,%o0,%o0 +/* 0x0214 */ sub %o0,%o1,%o0 +/* 0x0218 205 */ srax %o0,32,%g5 +/* 0x021c 204 */ and %o0,%g2,%o1 +/* 0x0220 */ st %o1,[%g1+%i2] +/* 0x0224 */ sllx %o2,2,%o1 +/* 0x0228 205 */ ble,pt %icc,.L900000509 +/* 0x022c */ or %g0,%o1,%g1 + .L77000191: +/* 0x0230 */ ret ! Result = +/* 0x0234 */ restore %g0,%g0,%g0 +/* 0x0238 0 */ .type adjust_montf_result,2 +/* 0x0238 */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +/* 000000 */ .skip 24 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global mont_mulf_noconv + mont_mulf_noconv: +/* 000000 */ save %sp,-224,%sp + .L900000643: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5 +/* 0x000c */ ldx [%fp+2223],%l0 + +! 206 ! } +! 207 ! } +! 208 !} +! 213 !/* +! 214 !** the lengths of the input arrays should be at least the following: +! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 216 !** all of them should be different from one another +! 217 !** +! 218 !*/ +! 219 !void mont_mulf_noconv(unsigned int *result, +! 220 ! double *dm1, double *dm2, double *dt, +! 221 ! double *dn, unsigned int *nint, +! 222 ! int nlen, double dn0) +! 223 !{ +! 224 ! int i, j, jj; +! 225 ! int tmp; +! 226 ! double digit, m2j, nextm2j, a, b; +! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 229 ! pdm1=&(dm1[0]); +! 230 ! pdm2=&(dm2[0]); +! 231 ! pdn=&(dn[0]); +! 232 ! pdm2[2*nlen]=Zero; + +/* 0x0010 232 */ sethi %hi(Zero),%g2 +/* 0x0014 223 */ fmovd %f14,%f30 +/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5 +/* 0x001c 232 */ add %g2,%lo(Zero),%g2 +/* 0x0020 */ sll %l0,1,%o3 +/* 0x0024 223 */ add %g5,%o7,%o4 +/* 0x0028 232 */ sra %o3,0,%g5 +/* 0x002c */ ldx [%o4+%g2],%o7 + +! 234 ! if (nlen!=16) +! 235 ! { +! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 238 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); + +/* 0x0030 239 */ sethi %hi(TwoToMinus16),%g3 +/* 0x0034 */ sethi %hi(TwoTo16),%g4 +/* 0x0038 */ add %g3,%lo(TwoToMinus16),%g2 +/* 0x003c 232 */ ldd [%o7],%f0 +/* 0x0040 239 */ add %g4,%lo(TwoTo16),%g3 +/* 0x0044 223 */ or %g0,%i4,%o0 +/* 0x0048 232 */ sllx %g5,3,%g4 +/* 0x004c 239 */ ldx [%o4+%g2],%o5 +/* 0x0050 223 */ or %g0,%i5,%l3 +/* 0x0054 */ or %g0,%i0,%l2 +/* 0x0058 239 */ ldx [%o4+%g3],%o4 +/* 0x005c 234 */ cmp %l0,16 +/* 0x0060 232 */ std %f0,[%i2+%g4] +/* 0x0064 234 */ be,pn %icc,.L77000279 +/* 0x0068 */ or %g0,%i3,%l4 +/* 0x006c 236 */ sll %l0,2,%g2 +/* 0x0070 223 */ or %g0,%o0,%i5 +/* 0x0074 236 */ add %g2,2,%o0 +/* 0x0078 223 */ or %g0,%i1,%i4 +/* 0x007c 236 */ cmp %o0,0 +/* 0x0080 223 */ or %g0,%i2,%l1 +/* 0x0084 236 */ ble,a,pt %icc,.L900000657 +/* 0x0088 */ ldd [%i1],%f6 + +! 241 ! pdtj=&(dt[0]); +! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 243 ! { +! 244 ! m2j=pdm2[j]; +! 245 ! a=pdtj[0]+pdn[0]*digit; +! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 247 ! pdtj[1]=b; +! 249 !#pragma pipeloop(0) +! 250 ! for(i=1;iused && (a)->dp[(a)->used - 1] == 0) \ + --((a)->used); \ + (a)->sign = (a)->used ? (a)->sign : ZPOS; \ + } + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ + } while (0); + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +/* sqr macros only */ +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ + } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i) \ + : "%rax", "%rdx", "cc"); + +#define SQRADD2(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDSC(i, j) \ + __asm__( \ + "movq %3,%%rax \n\t" \ + "mulq %4 \n\t" \ + "movq %%rax,%0 \n\t" \ + "movq %%rdx,%1 \n\t" \ + "xorq %2,%2 \n\t" \ + : "=r"(sc0), "=r"(sc1), "=r"(sc2) \ + : "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDAC(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(sc0), "=r"(sc1), "=r"(sc2) \ + : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDDB \ + __asm__( \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + : "=&r"(c0), "=&r"(c1), "=&r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \ + : "cc"); + +void +s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[8]; + + memcpy(at, A->dp, 4 * sizeof(mp_digit)); + memcpy(at + 4, B->dp, 4 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); + MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); + MULADD(at[1], at[5]); + MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); + MULADD(at[1], at[6]); + MULADD(at[2], at[5]); + MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); + MULADD(at[2], at[6]); + MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); + MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[16]; + + memcpy(at, A->dp, 8 * sizeof(mp_digit)); + memcpy(at + 8, B->dp, 8 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); + MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); + MULADD(at[1], at[9]); + MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); + MULADD(at[1], at[10]); + MULADD(at[2], at[9]); + MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); + MULADD(at[1], at[11]); + MULADD(at[2], at[10]); + MULADD(at[3], at[9]); + MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); + MULADD(at[1], at[12]); + MULADD(at[2], at[11]); + MULADD(at[3], at[10]); + MULADD(at[4], at[9]); + MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); + MULADD(at[1], at[13]); + MULADD(at[2], at[12]); + MULADD(at[3], at[11]); + MULADD(at[4], at[10]); + MULADD(at[5], at[9]); + MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); + MULADD(at[1], at[14]); + MULADD(at[2], at[13]); + MULADD(at[3], at[12]); + MULADD(at[4], at[11]); + MULADD(at[5], at[10]); + MULADD(at[6], at[9]); + MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); + MULADD(at[2], at[14]); + MULADD(at[3], at[13]); + MULADD(at[4], at[12]); + MULADD(at[5], at[11]); + MULADD(at[6], at[10]); + MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); + MULADD(at[3], at[14]); + MULADD(at[4], at[13]); + MULADD(at[5], at[12]); + MULADD(at[6], at[11]); + MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); + MULADD(at[4], at[14]); + MULADD(at[5], at[13]); + MULADD(at[6], at[12]); + MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); + MULADD(at[5], at[14]); + MULADD(at[6], at[13]); + MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); + MULADD(at[6], at[14]); + MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); + MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[32]; + + memcpy(at, A->dp, 16 * sizeof(mp_digit)); + memcpy(at + 16, B->dp, 16 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); + MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); + MULADD(at[1], at[17]); + MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); + MULADD(at[1], at[18]); + MULADD(at[2], at[17]); + MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); + MULADD(at[1], at[19]); + MULADD(at[2], at[18]); + MULADD(at[3], at[17]); + MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); + MULADD(at[1], at[20]); + MULADD(at[2], at[19]); + MULADD(at[3], at[18]); + MULADD(at[4], at[17]); + MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); + MULADD(at[1], at[21]); + MULADD(at[2], at[20]); + MULADD(at[3], at[19]); + MULADD(at[4], at[18]); + MULADD(at[5], at[17]); + MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); + MULADD(at[1], at[22]); + MULADD(at[2], at[21]); + MULADD(at[3], at[20]); + MULADD(at[4], at[19]); + MULADD(at[5], at[18]); + MULADD(at[6], at[17]); + MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); + MULADD(at[1], at[23]); + MULADD(at[2], at[22]); + MULADD(at[3], at[21]); + MULADD(at[4], at[20]); + MULADD(at[5], at[19]); + MULADD(at[6], at[18]); + MULADD(at[7], at[17]); + MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); + MULADD(at[1], at[24]); + MULADD(at[2], at[23]); + MULADD(at[3], at[22]); + MULADD(at[4], at[21]); + MULADD(at[5], at[20]); + MULADD(at[6], at[19]); + MULADD(at[7], at[18]); + MULADD(at[8], at[17]); + MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); + MULADD(at[1], at[25]); + MULADD(at[2], at[24]); + MULADD(at[3], at[23]); + MULADD(at[4], at[22]); + MULADD(at[5], at[21]); + MULADD(at[6], at[20]); + MULADD(at[7], at[19]); + MULADD(at[8], at[18]); + MULADD(at[9], at[17]); + MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); + MULADD(at[1], at[26]); + MULADD(at[2], at[25]); + MULADD(at[3], at[24]); + MULADD(at[4], at[23]); + MULADD(at[5], at[22]); + MULADD(at[6], at[21]); + MULADD(at[7], at[20]); + MULADD(at[8], at[19]); + MULADD(at[9], at[18]); + MULADD(at[10], at[17]); + MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); + MULADD(at[1], at[27]); + MULADD(at[2], at[26]); + MULADD(at[3], at[25]); + MULADD(at[4], at[24]); + MULADD(at[5], at[23]); + MULADD(at[6], at[22]); + MULADD(at[7], at[21]); + MULADD(at[8], at[20]); + MULADD(at[9], at[19]); + MULADD(at[10], at[18]); + MULADD(at[11], at[17]); + MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); + MULADD(at[1], at[28]); + MULADD(at[2], at[27]); + MULADD(at[3], at[26]); + MULADD(at[4], at[25]); + MULADD(at[5], at[24]); + MULADD(at[6], at[23]); + MULADD(at[7], at[22]); + MULADD(at[8], at[21]); + MULADD(at[9], at[20]); + MULADD(at[10], at[19]); + MULADD(at[11], at[18]); + MULADD(at[12], at[17]); + MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); + MULADD(at[1], at[29]); + MULADD(at[2], at[28]); + MULADD(at[3], at[27]); + MULADD(at[4], at[26]); + MULADD(at[5], at[25]); + MULADD(at[6], at[24]); + MULADD(at[7], at[23]); + MULADD(at[8], at[22]); + MULADD(at[9], at[21]); + MULADD(at[10], at[20]); + MULADD(at[11], at[19]); + MULADD(at[12], at[18]); + MULADD(at[13], at[17]); + MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); + MULADD(at[1], at[30]); + MULADD(at[2], at[29]); + MULADD(at[3], at[28]); + MULADD(at[4], at[27]); + MULADD(at[5], at[26]); + MULADD(at[6], at[25]); + MULADD(at[7], at[24]); + MULADD(at[8], at[23]); + MULADD(at[9], at[22]); + MULADD(at[10], at[21]); + MULADD(at[11], at[20]); + MULADD(at[12], at[19]); + MULADD(at[13], at[18]); + MULADD(at[14], at[17]); + MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); + MULADD(at[2], at[30]); + MULADD(at[3], at[29]); + MULADD(at[4], at[28]); + MULADD(at[5], at[27]); + MULADD(at[6], at[26]); + MULADD(at[7], at[25]); + MULADD(at[8], at[24]); + MULADD(at[9], at[23]); + MULADD(at[10], at[22]); + MULADD(at[11], at[21]); + MULADD(at[12], at[20]); + MULADD(at[13], at[19]); + MULADD(at[14], at[18]); + MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); + MULADD(at[3], at[30]); + MULADD(at[4], at[29]); + MULADD(at[5], at[28]); + MULADD(at[6], at[27]); + MULADD(at[7], at[26]); + MULADD(at[8], at[25]); + MULADD(at[9], at[24]); + MULADD(at[10], at[23]); + MULADD(at[11], at[22]); + MULADD(at[12], at[21]); + MULADD(at[13], at[20]); + MULADD(at[14], at[19]); + MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); + MULADD(at[4], at[30]); + MULADD(at[5], at[29]); + MULADD(at[6], at[28]); + MULADD(at[7], at[27]); + MULADD(at[8], at[26]); + MULADD(at[9], at[25]); + MULADD(at[10], at[24]); + MULADD(at[11], at[23]); + MULADD(at[12], at[22]); + MULADD(at[13], at[21]); + MULADD(at[14], at[20]); + MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); + MULADD(at[5], at[30]); + MULADD(at[6], at[29]); + MULADD(at[7], at[28]); + MULADD(at[8], at[27]); + MULADD(at[9], at[26]); + MULADD(at[10], at[25]); + MULADD(at[11], at[24]); + MULADD(at[12], at[23]); + MULADD(at[13], at[22]); + MULADD(at[14], at[21]); + MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); + MULADD(at[6], at[30]); + MULADD(at[7], at[29]); + MULADD(at[8], at[28]); + MULADD(at[9], at[27]); + MULADD(at[10], at[26]); + MULADD(at[11], at[25]); + MULADD(at[12], at[24]); + MULADD(at[13], at[23]); + MULADD(at[14], at[22]); + MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); + MULADD(at[7], at[30]); + MULADD(at[8], at[29]); + MULADD(at[9], at[28]); + MULADD(at[10], at[27]); + MULADD(at[11], at[26]); + MULADD(at[12], at[25]); + MULADD(at[13], at[24]); + MULADD(at[14], at[23]); + MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); + MULADD(at[8], at[30]); + MULADD(at[9], at[29]); + MULADD(at[10], at[28]); + MULADD(at[11], at[27]); + MULADD(at[12], at[26]); + MULADD(at[13], at[25]); + MULADD(at[14], at[24]); + MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); + MULADD(at[9], at[30]); + MULADD(at[10], at[29]); + MULADD(at[11], at[28]); + MULADD(at[12], at[27]); + MULADD(at[13], at[26]); + MULADD(at[14], at[25]); + MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); + MULADD(at[10], at[30]); + MULADD(at[11], at[29]); + MULADD(at[12], at[28]); + MULADD(at[13], at[27]); + MULADD(at[14], at[26]); + MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); + MULADD(at[11], at[30]); + MULADD(at[12], at[29]); + MULADD(at[13], at[28]); + MULADD(at[14], at[27]); + MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); + MULADD(at[12], at[30]); + MULADD(at[13], at[29]); + MULADD(at[14], at[28]); + MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); + MULADD(at[13], at[30]); + MULADD(at[14], at[29]); + MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); + MULADD(at[14], at[30]); + MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); + MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[64]; + + memcpy(at, A->dp, 32 * sizeof(mp_digit)); + memcpy(at + 32, B->dp, 32 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); + MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); + MULADD(at[1], at[33]); + MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); + MULADD(at[1], at[34]); + MULADD(at[2], at[33]); + MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); + MULADD(at[1], at[35]); + MULADD(at[2], at[34]); + MULADD(at[3], at[33]); + MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); + MULADD(at[1], at[36]); + MULADD(at[2], at[35]); + MULADD(at[3], at[34]); + MULADD(at[4], at[33]); + MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); + MULADD(at[1], at[37]); + MULADD(at[2], at[36]); + MULADD(at[3], at[35]); + MULADD(at[4], at[34]); + MULADD(at[5], at[33]); + MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); + MULADD(at[1], at[38]); + MULADD(at[2], at[37]); + MULADD(at[3], at[36]); + MULADD(at[4], at[35]); + MULADD(at[5], at[34]); + MULADD(at[6], at[33]); + MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); + MULADD(at[1], at[39]); + MULADD(at[2], at[38]); + MULADD(at[3], at[37]); + MULADD(at[4], at[36]); + MULADD(at[5], at[35]); + MULADD(at[6], at[34]); + MULADD(at[7], at[33]); + MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); + MULADD(at[1], at[40]); + MULADD(at[2], at[39]); + MULADD(at[3], at[38]); + MULADD(at[4], at[37]); + MULADD(at[5], at[36]); + MULADD(at[6], at[35]); + MULADD(at[7], at[34]); + MULADD(at[8], at[33]); + MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); + MULADD(at[1], at[41]); + MULADD(at[2], at[40]); + MULADD(at[3], at[39]); + MULADD(at[4], at[38]); + MULADD(at[5], at[37]); + MULADD(at[6], at[36]); + MULADD(at[7], at[35]); + MULADD(at[8], at[34]); + MULADD(at[9], at[33]); + MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); + MULADD(at[1], at[42]); + MULADD(at[2], at[41]); + MULADD(at[3], at[40]); + MULADD(at[4], at[39]); + MULADD(at[5], at[38]); + MULADD(at[6], at[37]); + MULADD(at[7], at[36]); + MULADD(at[8], at[35]); + MULADD(at[9], at[34]); + MULADD(at[10], at[33]); + MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); + MULADD(at[1], at[43]); + MULADD(at[2], at[42]); + MULADD(at[3], at[41]); + MULADD(at[4], at[40]); + MULADD(at[5], at[39]); + MULADD(at[6], at[38]); + MULADD(at[7], at[37]); + MULADD(at[8], at[36]); + MULADD(at[9], at[35]); + MULADD(at[10], at[34]); + MULADD(at[11], at[33]); + MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); + MULADD(at[1], at[44]); + MULADD(at[2], at[43]); + MULADD(at[3], at[42]); + MULADD(at[4], at[41]); + MULADD(at[5], at[40]); + MULADD(at[6], at[39]); + MULADD(at[7], at[38]); + MULADD(at[8], at[37]); + MULADD(at[9], at[36]); + MULADD(at[10], at[35]); + MULADD(at[11], at[34]); + MULADD(at[12], at[33]); + MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); + MULADD(at[1], at[45]); + MULADD(at[2], at[44]); + MULADD(at[3], at[43]); + MULADD(at[4], at[42]); + MULADD(at[5], at[41]); + MULADD(at[6], at[40]); + MULADD(at[7], at[39]); + MULADD(at[8], at[38]); + MULADD(at[9], at[37]); + MULADD(at[10], at[36]); + MULADD(at[11], at[35]); + MULADD(at[12], at[34]); + MULADD(at[13], at[33]); + MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); + MULADD(at[1], at[46]); + MULADD(at[2], at[45]); + MULADD(at[3], at[44]); + MULADD(at[4], at[43]); + MULADD(at[5], at[42]); + MULADD(at[6], at[41]); + MULADD(at[7], at[40]); + MULADD(at[8], at[39]); + MULADD(at[9], at[38]); + MULADD(at[10], at[37]); + MULADD(at[11], at[36]); + MULADD(at[12], at[35]); + MULADD(at[13], at[34]); + MULADD(at[14], at[33]); + MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); + MULADD(at[1], at[47]); + MULADD(at[2], at[46]); + MULADD(at[3], at[45]); + MULADD(at[4], at[44]); + MULADD(at[5], at[43]); + MULADD(at[6], at[42]); + MULADD(at[7], at[41]); + MULADD(at[8], at[40]); + MULADD(at[9], at[39]); + MULADD(at[10], at[38]); + MULADD(at[11], at[37]); + MULADD(at[12], at[36]); + MULADD(at[13], at[35]); + MULADD(at[14], at[34]); + MULADD(at[15], at[33]); + MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); + MULADD(at[1], at[48]); + MULADD(at[2], at[47]); + MULADD(at[3], at[46]); + MULADD(at[4], at[45]); + MULADD(at[5], at[44]); + MULADD(at[6], at[43]); + MULADD(at[7], at[42]); + MULADD(at[8], at[41]); + MULADD(at[9], at[40]); + MULADD(at[10], at[39]); + MULADD(at[11], at[38]); + MULADD(at[12], at[37]); + MULADD(at[13], at[36]); + MULADD(at[14], at[35]); + MULADD(at[15], at[34]); + MULADD(at[16], at[33]); + MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); + MULADD(at[1], at[49]); + MULADD(at[2], at[48]); + MULADD(at[3], at[47]); + MULADD(at[4], at[46]); + MULADD(at[5], at[45]); + MULADD(at[6], at[44]); + MULADD(at[7], at[43]); + MULADD(at[8], at[42]); + MULADD(at[9], at[41]); + MULADD(at[10], at[40]); + MULADD(at[11], at[39]); + MULADD(at[12], at[38]); + MULADD(at[13], at[37]); + MULADD(at[14], at[36]); + MULADD(at[15], at[35]); + MULADD(at[16], at[34]); + MULADD(at[17], at[33]); + MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); + MULADD(at[1], at[50]); + MULADD(at[2], at[49]); + MULADD(at[3], at[48]); + MULADD(at[4], at[47]); + MULADD(at[5], at[46]); + MULADD(at[6], at[45]); + MULADD(at[7], at[44]); + MULADD(at[8], at[43]); + MULADD(at[9], at[42]); + MULADD(at[10], at[41]); + MULADD(at[11], at[40]); + MULADD(at[12], at[39]); + MULADD(at[13], at[38]); + MULADD(at[14], at[37]); + MULADD(at[15], at[36]); + MULADD(at[16], at[35]); + MULADD(at[17], at[34]); + MULADD(at[18], at[33]); + MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); + MULADD(at[1], at[51]); + MULADD(at[2], at[50]); + MULADD(at[3], at[49]); + MULADD(at[4], at[48]); + MULADD(at[5], at[47]); + MULADD(at[6], at[46]); + MULADD(at[7], at[45]); + MULADD(at[8], at[44]); + MULADD(at[9], at[43]); + MULADD(at[10], at[42]); + MULADD(at[11], at[41]); + MULADD(at[12], at[40]); + MULADD(at[13], at[39]); + MULADD(at[14], at[38]); + MULADD(at[15], at[37]); + MULADD(at[16], at[36]); + MULADD(at[17], at[35]); + MULADD(at[18], at[34]); + MULADD(at[19], at[33]); + MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); + MULADD(at[1], at[52]); + MULADD(at[2], at[51]); + MULADD(at[3], at[50]); + MULADD(at[4], at[49]); + MULADD(at[5], at[48]); + MULADD(at[6], at[47]); + MULADD(at[7], at[46]); + MULADD(at[8], at[45]); + MULADD(at[9], at[44]); + MULADD(at[10], at[43]); + MULADD(at[11], at[42]); + MULADD(at[12], at[41]); + MULADD(at[13], at[40]); + MULADD(at[14], at[39]); + MULADD(at[15], at[38]); + MULADD(at[16], at[37]); + MULADD(at[17], at[36]); + MULADD(at[18], at[35]); + MULADD(at[19], at[34]); + MULADD(at[20], at[33]); + MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); + MULADD(at[1], at[53]); + MULADD(at[2], at[52]); + MULADD(at[3], at[51]); + MULADD(at[4], at[50]); + MULADD(at[5], at[49]); + MULADD(at[6], at[48]); + MULADD(at[7], at[47]); + MULADD(at[8], at[46]); + MULADD(at[9], at[45]); + MULADD(at[10], at[44]); + MULADD(at[11], at[43]); + MULADD(at[12], at[42]); + MULADD(at[13], at[41]); + MULADD(at[14], at[40]); + MULADD(at[15], at[39]); + MULADD(at[16], at[38]); + MULADD(at[17], at[37]); + MULADD(at[18], at[36]); + MULADD(at[19], at[35]); + MULADD(at[20], at[34]); + MULADD(at[21], at[33]); + MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); + MULADD(at[1], at[54]); + MULADD(at[2], at[53]); + MULADD(at[3], at[52]); + MULADD(at[4], at[51]); + MULADD(at[5], at[50]); + MULADD(at[6], at[49]); + MULADD(at[7], at[48]); + MULADD(at[8], at[47]); + MULADD(at[9], at[46]); + MULADD(at[10], at[45]); + MULADD(at[11], at[44]); + MULADD(at[12], at[43]); + MULADD(at[13], at[42]); + MULADD(at[14], at[41]); + MULADD(at[15], at[40]); + MULADD(at[16], at[39]); + MULADD(at[17], at[38]); + MULADD(at[18], at[37]); + MULADD(at[19], at[36]); + MULADD(at[20], at[35]); + MULADD(at[21], at[34]); + MULADD(at[22], at[33]); + MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); + MULADD(at[1], at[55]); + MULADD(at[2], at[54]); + MULADD(at[3], at[53]); + MULADD(at[4], at[52]); + MULADD(at[5], at[51]); + MULADD(at[6], at[50]); + MULADD(at[7], at[49]); + MULADD(at[8], at[48]); + MULADD(at[9], at[47]); + MULADD(at[10], at[46]); + MULADD(at[11], at[45]); + MULADD(at[12], at[44]); + MULADD(at[13], at[43]); + MULADD(at[14], at[42]); + MULADD(at[15], at[41]); + MULADD(at[16], at[40]); + MULADD(at[17], at[39]); + MULADD(at[18], at[38]); + MULADD(at[19], at[37]); + MULADD(at[20], at[36]); + MULADD(at[21], at[35]); + MULADD(at[22], at[34]); + MULADD(at[23], at[33]); + MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); + MULADD(at[1], at[56]); + MULADD(at[2], at[55]); + MULADD(at[3], at[54]); + MULADD(at[4], at[53]); + MULADD(at[5], at[52]); + MULADD(at[6], at[51]); + MULADD(at[7], at[50]); + MULADD(at[8], at[49]); + MULADD(at[9], at[48]); + MULADD(at[10], at[47]); + MULADD(at[11], at[46]); + MULADD(at[12], at[45]); + MULADD(at[13], at[44]); + MULADD(at[14], at[43]); + MULADD(at[15], at[42]); + MULADD(at[16], at[41]); + MULADD(at[17], at[40]); + MULADD(at[18], at[39]); + MULADD(at[19], at[38]); + MULADD(at[20], at[37]); + MULADD(at[21], at[36]); + MULADD(at[22], at[35]); + MULADD(at[23], at[34]); + MULADD(at[24], at[33]); + MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); + MULADD(at[1], at[57]); + MULADD(at[2], at[56]); + MULADD(at[3], at[55]); + MULADD(at[4], at[54]); + MULADD(at[5], at[53]); + MULADD(at[6], at[52]); + MULADD(at[7], at[51]); + MULADD(at[8], at[50]); + MULADD(at[9], at[49]); + MULADD(at[10], at[48]); + MULADD(at[11], at[47]); + MULADD(at[12], at[46]); + MULADD(at[13], at[45]); + MULADD(at[14], at[44]); + MULADD(at[15], at[43]); + MULADD(at[16], at[42]); + MULADD(at[17], at[41]); + MULADD(at[18], at[40]); + MULADD(at[19], at[39]); + MULADD(at[20], at[38]); + MULADD(at[21], at[37]); + MULADD(at[22], at[36]); + MULADD(at[23], at[35]); + MULADD(at[24], at[34]); + MULADD(at[25], at[33]); + MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); + MULADD(at[1], at[58]); + MULADD(at[2], at[57]); + MULADD(at[3], at[56]); + MULADD(at[4], at[55]); + MULADD(at[5], at[54]); + MULADD(at[6], at[53]); + MULADD(at[7], at[52]); + MULADD(at[8], at[51]); + MULADD(at[9], at[50]); + MULADD(at[10], at[49]); + MULADD(at[11], at[48]); + MULADD(at[12], at[47]); + MULADD(at[13], at[46]); + MULADD(at[14], at[45]); + MULADD(at[15], at[44]); + MULADD(at[16], at[43]); + MULADD(at[17], at[42]); + MULADD(at[18], at[41]); + MULADD(at[19], at[40]); + MULADD(at[20], at[39]); + MULADD(at[21], at[38]); + MULADD(at[22], at[37]); + MULADD(at[23], at[36]); + MULADD(at[24], at[35]); + MULADD(at[25], at[34]); + MULADD(at[26], at[33]); + MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); + MULADD(at[1], at[59]); + MULADD(at[2], at[58]); + MULADD(at[3], at[57]); + MULADD(at[4], at[56]); + MULADD(at[5], at[55]); + MULADD(at[6], at[54]); + MULADD(at[7], at[53]); + MULADD(at[8], at[52]); + MULADD(at[9], at[51]); + MULADD(at[10], at[50]); + MULADD(at[11], at[49]); + MULADD(at[12], at[48]); + MULADD(at[13], at[47]); + MULADD(at[14], at[46]); + MULADD(at[15], at[45]); + MULADD(at[16], at[44]); + MULADD(at[17], at[43]); + MULADD(at[18], at[42]); + MULADD(at[19], at[41]); + MULADD(at[20], at[40]); + MULADD(at[21], at[39]); + MULADD(at[22], at[38]); + MULADD(at[23], at[37]); + MULADD(at[24], at[36]); + MULADD(at[25], at[35]); + MULADD(at[26], at[34]); + MULADD(at[27], at[33]); + MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); + MULADD(at[1], at[60]); + MULADD(at[2], at[59]); + MULADD(at[3], at[58]); + MULADD(at[4], at[57]); + MULADD(at[5], at[56]); + MULADD(at[6], at[55]); + MULADD(at[7], at[54]); + MULADD(at[8], at[53]); + MULADD(at[9], at[52]); + MULADD(at[10], at[51]); + MULADD(at[11], at[50]); + MULADD(at[12], at[49]); + MULADD(at[13], at[48]); + MULADD(at[14], at[47]); + MULADD(at[15], at[46]); + MULADD(at[16], at[45]); + MULADD(at[17], at[44]); + MULADD(at[18], at[43]); + MULADD(at[19], at[42]); + MULADD(at[20], at[41]); + MULADD(at[21], at[40]); + MULADD(at[22], at[39]); + MULADD(at[23], at[38]); + MULADD(at[24], at[37]); + MULADD(at[25], at[36]); + MULADD(at[26], at[35]); + MULADD(at[27], at[34]); + MULADD(at[28], at[33]); + MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); + MULADD(at[1], at[61]); + MULADD(at[2], at[60]); + MULADD(at[3], at[59]); + MULADD(at[4], at[58]); + MULADD(at[5], at[57]); + MULADD(at[6], at[56]); + MULADD(at[7], at[55]); + MULADD(at[8], at[54]); + MULADD(at[9], at[53]); + MULADD(at[10], at[52]); + MULADD(at[11], at[51]); + MULADD(at[12], at[50]); + MULADD(at[13], at[49]); + MULADD(at[14], at[48]); + MULADD(at[15], at[47]); + MULADD(at[16], at[46]); + MULADD(at[17], at[45]); + MULADD(at[18], at[44]); + MULADD(at[19], at[43]); + MULADD(at[20], at[42]); + MULADD(at[21], at[41]); + MULADD(at[22], at[40]); + MULADD(at[23], at[39]); + MULADD(at[24], at[38]); + MULADD(at[25], at[37]); + MULADD(at[26], at[36]); + MULADD(at[27], at[35]); + MULADD(at[28], at[34]); + MULADD(at[29], at[33]); + MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); + MULADD(at[1], at[62]); + MULADD(at[2], at[61]); + MULADD(at[3], at[60]); + MULADD(at[4], at[59]); + MULADD(at[5], at[58]); + MULADD(at[6], at[57]); + MULADD(at[7], at[56]); + MULADD(at[8], at[55]); + MULADD(at[9], at[54]); + MULADD(at[10], at[53]); + MULADD(at[11], at[52]); + MULADD(at[12], at[51]); + MULADD(at[13], at[50]); + MULADD(at[14], at[49]); + MULADD(at[15], at[48]); + MULADD(at[16], at[47]); + MULADD(at[17], at[46]); + MULADD(at[18], at[45]); + MULADD(at[19], at[44]); + MULADD(at[20], at[43]); + MULADD(at[21], at[42]); + MULADD(at[22], at[41]); + MULADD(at[23], at[40]); + MULADD(at[24], at[39]); + MULADD(at[25], at[38]); + MULADD(at[26], at[37]); + MULADD(at[27], at[36]); + MULADD(at[28], at[35]); + MULADD(at[29], at[34]); + MULADD(at[30], at[33]); + MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); + MULADD(at[2], at[62]); + MULADD(at[3], at[61]); + MULADD(at[4], at[60]); + MULADD(at[5], at[59]); + MULADD(at[6], at[58]); + MULADD(at[7], at[57]); + MULADD(at[8], at[56]); + MULADD(at[9], at[55]); + MULADD(at[10], at[54]); + MULADD(at[11], at[53]); + MULADD(at[12], at[52]); + MULADD(at[13], at[51]); + MULADD(at[14], at[50]); + MULADD(at[15], at[49]); + MULADD(at[16], at[48]); + MULADD(at[17], at[47]); + MULADD(at[18], at[46]); + MULADD(at[19], at[45]); + MULADD(at[20], at[44]); + MULADD(at[21], at[43]); + MULADD(at[22], at[42]); + MULADD(at[23], at[41]); + MULADD(at[24], at[40]); + MULADD(at[25], at[39]); + MULADD(at[26], at[38]); + MULADD(at[27], at[37]); + MULADD(at[28], at[36]); + MULADD(at[29], at[35]); + MULADD(at[30], at[34]); + MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); + MULADD(at[3], at[62]); + MULADD(at[4], at[61]); + MULADD(at[5], at[60]); + MULADD(at[6], at[59]); + MULADD(at[7], at[58]); + MULADD(at[8], at[57]); + MULADD(at[9], at[56]); + MULADD(at[10], at[55]); + MULADD(at[11], at[54]); + MULADD(at[12], at[53]); + MULADD(at[13], at[52]); + MULADD(at[14], at[51]); + MULADD(at[15], at[50]); + MULADD(at[16], at[49]); + MULADD(at[17], at[48]); + MULADD(at[18], at[47]); + MULADD(at[19], at[46]); + MULADD(at[20], at[45]); + MULADD(at[21], at[44]); + MULADD(at[22], at[43]); + MULADD(at[23], at[42]); + MULADD(at[24], at[41]); + MULADD(at[25], at[40]); + MULADD(at[26], at[39]); + MULADD(at[27], at[38]); + MULADD(at[28], at[37]); + MULADD(at[29], at[36]); + MULADD(at[30], at[35]); + MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); + MULADD(at[4], at[62]); + MULADD(at[5], at[61]); + MULADD(at[6], at[60]); + MULADD(at[7], at[59]); + MULADD(at[8], at[58]); + MULADD(at[9], at[57]); + MULADD(at[10], at[56]); + MULADD(at[11], at[55]); + MULADD(at[12], at[54]); + MULADD(at[13], at[53]); + MULADD(at[14], at[52]); + MULADD(at[15], at[51]); + MULADD(at[16], at[50]); + MULADD(at[17], at[49]); + MULADD(at[18], at[48]); + MULADD(at[19], at[47]); + MULADD(at[20], at[46]); + MULADD(at[21], at[45]); + MULADD(at[22], at[44]); + MULADD(at[23], at[43]); + MULADD(at[24], at[42]); + MULADD(at[25], at[41]); + MULADD(at[26], at[40]); + MULADD(at[27], at[39]); + MULADD(at[28], at[38]); + MULADD(at[29], at[37]); + MULADD(at[30], at[36]); + MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); + MULADD(at[5], at[62]); + MULADD(at[6], at[61]); + MULADD(at[7], at[60]); + MULADD(at[8], at[59]); + MULADD(at[9], at[58]); + MULADD(at[10], at[57]); + MULADD(at[11], at[56]); + MULADD(at[12], at[55]); + MULADD(at[13], at[54]); + MULADD(at[14], at[53]); + MULADD(at[15], at[52]); + MULADD(at[16], at[51]); + MULADD(at[17], at[50]); + MULADD(at[18], at[49]); + MULADD(at[19], at[48]); + MULADD(at[20], at[47]); + MULADD(at[21], at[46]); + MULADD(at[22], at[45]); + MULADD(at[23], at[44]); + MULADD(at[24], at[43]); + MULADD(at[25], at[42]); + MULADD(at[26], at[41]); + MULADD(at[27], at[40]); + MULADD(at[28], at[39]); + MULADD(at[29], at[38]); + MULADD(at[30], at[37]); + MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); + MULADD(at[6], at[62]); + MULADD(at[7], at[61]); + MULADD(at[8], at[60]); + MULADD(at[9], at[59]); + MULADD(at[10], at[58]); + MULADD(at[11], at[57]); + MULADD(at[12], at[56]); + MULADD(at[13], at[55]); + MULADD(at[14], at[54]); + MULADD(at[15], at[53]); + MULADD(at[16], at[52]); + MULADD(at[17], at[51]); + MULADD(at[18], at[50]); + MULADD(at[19], at[49]); + MULADD(at[20], at[48]); + MULADD(at[21], at[47]); + MULADD(at[22], at[46]); + MULADD(at[23], at[45]); + MULADD(at[24], at[44]); + MULADD(at[25], at[43]); + MULADD(at[26], at[42]); + MULADD(at[27], at[41]); + MULADD(at[28], at[40]); + MULADD(at[29], at[39]); + MULADD(at[30], at[38]); + MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); + MULADD(at[7], at[62]); + MULADD(at[8], at[61]); + MULADD(at[9], at[60]); + MULADD(at[10], at[59]); + MULADD(at[11], at[58]); + MULADD(at[12], at[57]); + MULADD(at[13], at[56]); + MULADD(at[14], at[55]); + MULADD(at[15], at[54]); + MULADD(at[16], at[53]); + MULADD(at[17], at[52]); + MULADD(at[18], at[51]); + MULADD(at[19], at[50]); + MULADD(at[20], at[49]); + MULADD(at[21], at[48]); + MULADD(at[22], at[47]); + MULADD(at[23], at[46]); + MULADD(at[24], at[45]); + MULADD(at[25], at[44]); + MULADD(at[26], at[43]); + MULADD(at[27], at[42]); + MULADD(at[28], at[41]); + MULADD(at[29], at[40]); + MULADD(at[30], at[39]); + MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); + MULADD(at[8], at[62]); + MULADD(at[9], at[61]); + MULADD(at[10], at[60]); + MULADD(at[11], at[59]); + MULADD(at[12], at[58]); + MULADD(at[13], at[57]); + MULADD(at[14], at[56]); + MULADD(at[15], at[55]); + MULADD(at[16], at[54]); + MULADD(at[17], at[53]); + MULADD(at[18], at[52]); + MULADD(at[19], at[51]); + MULADD(at[20], at[50]); + MULADD(at[21], at[49]); + MULADD(at[22], at[48]); + MULADD(at[23], at[47]); + MULADD(at[24], at[46]); + MULADD(at[25], at[45]); + MULADD(at[26], at[44]); + MULADD(at[27], at[43]); + MULADD(at[28], at[42]); + MULADD(at[29], at[41]); + MULADD(at[30], at[40]); + MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); + MULADD(at[9], at[62]); + MULADD(at[10], at[61]); + MULADD(at[11], at[60]); + MULADD(at[12], at[59]); + MULADD(at[13], at[58]); + MULADD(at[14], at[57]); + MULADD(at[15], at[56]); + MULADD(at[16], at[55]); + MULADD(at[17], at[54]); + MULADD(at[18], at[53]); + MULADD(at[19], at[52]); + MULADD(at[20], at[51]); + MULADD(at[21], at[50]); + MULADD(at[22], at[49]); + MULADD(at[23], at[48]); + MULADD(at[24], at[47]); + MULADD(at[25], at[46]); + MULADD(at[26], at[45]); + MULADD(at[27], at[44]); + MULADD(at[28], at[43]); + MULADD(at[29], at[42]); + MULADD(at[30], at[41]); + MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); + MULADD(at[10], at[62]); + MULADD(at[11], at[61]); + MULADD(at[12], at[60]); + MULADD(at[13], at[59]); + MULADD(at[14], at[58]); + MULADD(at[15], at[57]); + MULADD(at[16], at[56]); + MULADD(at[17], at[55]); + MULADD(at[18], at[54]); + MULADD(at[19], at[53]); + MULADD(at[20], at[52]); + MULADD(at[21], at[51]); + MULADD(at[22], at[50]); + MULADD(at[23], at[49]); + MULADD(at[24], at[48]); + MULADD(at[25], at[47]); + MULADD(at[26], at[46]); + MULADD(at[27], at[45]); + MULADD(at[28], at[44]); + MULADD(at[29], at[43]); + MULADD(at[30], at[42]); + MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); + MULADD(at[11], at[62]); + MULADD(at[12], at[61]); + MULADD(at[13], at[60]); + MULADD(at[14], at[59]); + MULADD(at[15], at[58]); + MULADD(at[16], at[57]); + MULADD(at[17], at[56]); + MULADD(at[18], at[55]); + MULADD(at[19], at[54]); + MULADD(at[20], at[53]); + MULADD(at[21], at[52]); + MULADD(at[22], at[51]); + MULADD(at[23], at[50]); + MULADD(at[24], at[49]); + MULADD(at[25], at[48]); + MULADD(at[26], at[47]); + MULADD(at[27], at[46]); + MULADD(at[28], at[45]); + MULADD(at[29], at[44]); + MULADD(at[30], at[43]); + MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); + MULADD(at[12], at[62]); + MULADD(at[13], at[61]); + MULADD(at[14], at[60]); + MULADD(at[15], at[59]); + MULADD(at[16], at[58]); + MULADD(at[17], at[57]); + MULADD(at[18], at[56]); + MULADD(at[19], at[55]); + MULADD(at[20], at[54]); + MULADD(at[21], at[53]); + MULADD(at[22], at[52]); + MULADD(at[23], at[51]); + MULADD(at[24], at[50]); + MULADD(at[25], at[49]); + MULADD(at[26], at[48]); + MULADD(at[27], at[47]); + MULADD(at[28], at[46]); + MULADD(at[29], at[45]); + MULADD(at[30], at[44]); + MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); + MULADD(at[13], at[62]); + MULADD(at[14], at[61]); + MULADD(at[15], at[60]); + MULADD(at[16], at[59]); + MULADD(at[17], at[58]); + MULADD(at[18], at[57]); + MULADD(at[19], at[56]); + MULADD(at[20], at[55]); + MULADD(at[21], at[54]); + MULADD(at[22], at[53]); + MULADD(at[23], at[52]); + MULADD(at[24], at[51]); + MULADD(at[25], at[50]); + MULADD(at[26], at[49]); + MULADD(at[27], at[48]); + MULADD(at[28], at[47]); + MULADD(at[29], at[46]); + MULADD(at[30], at[45]); + MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); + MULADD(at[14], at[62]); + MULADD(at[15], at[61]); + MULADD(at[16], at[60]); + MULADD(at[17], at[59]); + MULADD(at[18], at[58]); + MULADD(at[19], at[57]); + MULADD(at[20], at[56]); + MULADD(at[21], at[55]); + MULADD(at[22], at[54]); + MULADD(at[23], at[53]); + MULADD(at[24], at[52]); + MULADD(at[25], at[51]); + MULADD(at[26], at[50]); + MULADD(at[27], at[49]); + MULADD(at[28], at[48]); + MULADD(at[29], at[47]); + MULADD(at[30], at[46]); + MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); + MULADD(at[15], at[62]); + MULADD(at[16], at[61]); + MULADD(at[17], at[60]); + MULADD(at[18], at[59]); + MULADD(at[19], at[58]); + MULADD(at[20], at[57]); + MULADD(at[21], at[56]); + MULADD(at[22], at[55]); + MULADD(at[23], at[54]); + MULADD(at[24], at[53]); + MULADD(at[25], at[52]); + MULADD(at[26], at[51]); + MULADD(at[27], at[50]); + MULADD(at[28], at[49]); + MULADD(at[29], at[48]); + MULADD(at[30], at[47]); + MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); + MULADD(at[16], at[62]); + MULADD(at[17], at[61]); + MULADD(at[18], at[60]); + MULADD(at[19], at[59]); + MULADD(at[20], at[58]); + MULADD(at[21], at[57]); + MULADD(at[22], at[56]); + MULADD(at[23], at[55]); + MULADD(at[24], at[54]); + MULADD(at[25], at[53]); + MULADD(at[26], at[52]); + MULADD(at[27], at[51]); + MULADD(at[28], at[50]); + MULADD(at[29], at[49]); + MULADD(at[30], at[48]); + MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); + MULADD(at[17], at[62]); + MULADD(at[18], at[61]); + MULADD(at[19], at[60]); + MULADD(at[20], at[59]); + MULADD(at[21], at[58]); + MULADD(at[22], at[57]); + MULADD(at[23], at[56]); + MULADD(at[24], at[55]); + MULADD(at[25], at[54]); + MULADD(at[26], at[53]); + MULADD(at[27], at[52]); + MULADD(at[28], at[51]); + MULADD(at[29], at[50]); + MULADD(at[30], at[49]); + MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); + MULADD(at[18], at[62]); + MULADD(at[19], at[61]); + MULADD(at[20], at[60]); + MULADD(at[21], at[59]); + MULADD(at[22], at[58]); + MULADD(at[23], at[57]); + MULADD(at[24], at[56]); + MULADD(at[25], at[55]); + MULADD(at[26], at[54]); + MULADD(at[27], at[53]); + MULADD(at[28], at[52]); + MULADD(at[29], at[51]); + MULADD(at[30], at[50]); + MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); + MULADD(at[19], at[62]); + MULADD(at[20], at[61]); + MULADD(at[21], at[60]); + MULADD(at[22], at[59]); + MULADD(at[23], at[58]); + MULADD(at[24], at[57]); + MULADD(at[25], at[56]); + MULADD(at[26], at[55]); + MULADD(at[27], at[54]); + MULADD(at[28], at[53]); + MULADD(at[29], at[52]); + MULADD(at[30], at[51]); + MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); + MULADD(at[20], at[62]); + MULADD(at[21], at[61]); + MULADD(at[22], at[60]); + MULADD(at[23], at[59]); + MULADD(at[24], at[58]); + MULADD(at[25], at[57]); + MULADD(at[26], at[56]); + MULADD(at[27], at[55]); + MULADD(at[28], at[54]); + MULADD(at[29], at[53]); + MULADD(at[30], at[52]); + MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); + MULADD(at[21], at[62]); + MULADD(at[22], at[61]); + MULADD(at[23], at[60]); + MULADD(at[24], at[59]); + MULADD(at[25], at[58]); + MULADD(at[26], at[57]); + MULADD(at[27], at[56]); + MULADD(at[28], at[55]); + MULADD(at[29], at[54]); + MULADD(at[30], at[53]); + MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); + MULADD(at[22], at[62]); + MULADD(at[23], at[61]); + MULADD(at[24], at[60]); + MULADD(at[25], at[59]); + MULADD(at[26], at[58]); + MULADD(at[27], at[57]); + MULADD(at[28], at[56]); + MULADD(at[29], at[55]); + MULADD(at[30], at[54]); + MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); + MULADD(at[23], at[62]); + MULADD(at[24], at[61]); + MULADD(at[25], at[60]); + MULADD(at[26], at[59]); + MULADD(at[27], at[58]); + MULADD(at[28], at[57]); + MULADD(at[29], at[56]); + MULADD(at[30], at[55]); + MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); + MULADD(at[24], at[62]); + MULADD(at[25], at[61]); + MULADD(at[26], at[60]); + MULADD(at[27], at[59]); + MULADD(at[28], at[58]); + MULADD(at[29], at[57]); + MULADD(at[30], at[56]); + MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); + MULADD(at[25], at[62]); + MULADD(at[26], at[61]); + MULADD(at[27], at[60]); + MULADD(at[28], at[59]); + MULADD(at[29], at[58]); + MULADD(at[30], at[57]); + MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); + MULADD(at[26], at[62]); + MULADD(at[27], at[61]); + MULADD(at[28], at[60]); + MULADD(at[29], at[59]); + MULADD(at[30], at[58]); + MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); + MULADD(at[27], at[62]); + MULADD(at[28], at[61]); + MULADD(at[29], at[60]); + MULADD(at[30], at[59]); + MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); + MULADD(at[28], at[62]); + MULADD(at[29], at[61]); + MULADD(at[30], at[60]); + MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); + MULADD(at[29], at[62]); + MULADD(at[30], at[61]); + MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); + MULADD(at[30], at[62]); + MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); + MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_sqr_comba_4(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[8], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = ZPOS; + memcpy(B->dp, b, 8 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_8(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); + SQRADD2(a[4], a[6]); + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); + SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = ZPOS; + memcpy(B->dp, b, 16 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_16(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); + SQRADDAC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); + SQRADDAC(a[1], a[8]); + SQRADDAC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); + SQRADDAC(a[1], a[9]); + SQRADDAC(a[2], a[8]); + SQRADDAC(a[3], a[7]); + SQRADDAC(a[4], a[6]); + SQRADDDB; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); + SQRADDAC(a[1], a[10]); + SQRADDAC(a[2], a[9]); + SQRADDAC(a[3], a[8]); + SQRADDAC(a[4], a[7]); + SQRADDAC(a[5], a[6]); + SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); + SQRADDAC(a[1], a[11]); + SQRADDAC(a[2], a[10]); + SQRADDAC(a[3], a[9]); + SQRADDAC(a[4], a[8]); + SQRADDAC(a[5], a[7]); + SQRADDDB; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); + SQRADDAC(a[1], a[12]); + SQRADDAC(a[2], a[11]); + SQRADDAC(a[3], a[10]); + SQRADDAC(a[4], a[9]); + SQRADDAC(a[5], a[8]); + SQRADDAC(a[6], a[7]); + SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); + SQRADDAC(a[1], a[13]); + SQRADDAC(a[2], a[12]); + SQRADDAC(a[3], a[11]); + SQRADDAC(a[4], a[10]); + SQRADDAC(a[5], a[9]); + SQRADDAC(a[6], a[8]); + SQRADDDB; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); + SQRADDAC(a[1], a[14]); + SQRADDAC(a[2], a[13]); + SQRADDAC(a[3], a[12]); + SQRADDAC(a[4], a[11]); + SQRADDAC(a[5], a[10]); + SQRADDAC(a[6], a[9]); + SQRADDAC(a[7], a[8]); + SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[15]); + SQRADDAC(a[2], a[14]); + SQRADDAC(a[3], a[13]); + SQRADDAC(a[4], a[12]); + SQRADDAC(a[5], a[11]); + SQRADDAC(a[6], a[10]); + SQRADDAC(a[7], a[9]); + SQRADDDB; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[15]); + SQRADDAC(a[3], a[14]); + SQRADDAC(a[4], a[13]); + SQRADDAC(a[5], a[12]); + SQRADDAC(a[6], a[11]); + SQRADDAC(a[7], a[10]); + SQRADDAC(a[8], a[9]); + SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[15]); + SQRADDAC(a[4], a[14]); + SQRADDAC(a[5], a[13]); + SQRADDAC(a[6], a[12]); + SQRADDAC(a[7], a[11]); + SQRADDAC(a[8], a[10]); + SQRADDDB; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[15]); + SQRADDAC(a[5], a[14]); + SQRADDAC(a[6], a[13]); + SQRADDAC(a[7], a[12]); + SQRADDAC(a[8], a[11]); + SQRADDAC(a[9], a[10]); + SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[15]); + SQRADDAC(a[6], a[14]); + SQRADDAC(a[7], a[13]); + SQRADDAC(a[8], a[12]); + SQRADDAC(a[9], a[11]); + SQRADDDB; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[15]); + SQRADDAC(a[7], a[14]); + SQRADDAC(a[8], a[13]); + SQRADDAC(a[9], a[12]); + SQRADDAC(a[10], a[11]); + SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[15]); + SQRADDAC(a[8], a[14]); + SQRADDAC(a[9], a[13]); + SQRADDAC(a[10], a[12]); + SQRADDDB; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[15]); + SQRADDAC(a[9], a[14]); + SQRADDAC(a[10], a[13]); + SQRADDAC(a[11], a[12]); + SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[15]); + SQRADDAC(a[10], a[14]); + SQRADDAC(a[11], a[13]); + SQRADDDB; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[15]); + SQRADDAC(a[11], a[14]); + SQRADDAC(a[12], a[13]); + SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); + SQRADD2(a[12], a[14]); + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); + SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = ZPOS; + memcpy(B->dp, b, 32 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_32(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); + SQRADDAC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); + SQRADDAC(a[1], a[8]); + SQRADDAC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); + SQRADDAC(a[1], a[9]); + SQRADDAC(a[2], a[8]); + SQRADDAC(a[3], a[7]); + SQRADDAC(a[4], a[6]); + SQRADDDB; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); + SQRADDAC(a[1], a[10]); + SQRADDAC(a[2], a[9]); + SQRADDAC(a[3], a[8]); + SQRADDAC(a[4], a[7]); + SQRADDAC(a[5], a[6]); + SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); + SQRADDAC(a[1], a[11]); + SQRADDAC(a[2], a[10]); + SQRADDAC(a[3], a[9]); + SQRADDAC(a[4], a[8]); + SQRADDAC(a[5], a[7]); + SQRADDDB; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); + SQRADDAC(a[1], a[12]); + SQRADDAC(a[2], a[11]); + SQRADDAC(a[3], a[10]); + SQRADDAC(a[4], a[9]); + SQRADDAC(a[5], a[8]); + SQRADDAC(a[6], a[7]); + SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); + SQRADDAC(a[1], a[13]); + SQRADDAC(a[2], a[12]); + SQRADDAC(a[3], a[11]); + SQRADDAC(a[4], a[10]); + SQRADDAC(a[5], a[9]); + SQRADDAC(a[6], a[8]); + SQRADDDB; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); + SQRADDAC(a[1], a[14]); + SQRADDAC(a[2], a[13]); + SQRADDAC(a[3], a[12]); + SQRADDAC(a[4], a[11]); + SQRADDAC(a[5], a[10]); + SQRADDAC(a[6], a[9]); + SQRADDAC(a[7], a[8]); + SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); + SQRADDAC(a[1], a[15]); + SQRADDAC(a[2], a[14]); + SQRADDAC(a[3], a[13]); + SQRADDAC(a[4], a[12]); + SQRADDAC(a[5], a[11]); + SQRADDAC(a[6], a[10]); + SQRADDAC(a[7], a[9]); + SQRADDDB; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); + SQRADDAC(a[1], a[16]); + SQRADDAC(a[2], a[15]); + SQRADDAC(a[3], a[14]); + SQRADDAC(a[4], a[13]); + SQRADDAC(a[5], a[12]); + SQRADDAC(a[6], a[11]); + SQRADDAC(a[7], a[10]); + SQRADDAC(a[8], a[9]); + SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); + SQRADDAC(a[1], a[17]); + SQRADDAC(a[2], a[16]); + SQRADDAC(a[3], a[15]); + SQRADDAC(a[4], a[14]); + SQRADDAC(a[5], a[13]); + SQRADDAC(a[6], a[12]); + SQRADDAC(a[7], a[11]); + SQRADDAC(a[8], a[10]); + SQRADDDB; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); + SQRADDAC(a[1], a[18]); + SQRADDAC(a[2], a[17]); + SQRADDAC(a[3], a[16]); + SQRADDAC(a[4], a[15]); + SQRADDAC(a[5], a[14]); + SQRADDAC(a[6], a[13]); + SQRADDAC(a[7], a[12]); + SQRADDAC(a[8], a[11]); + SQRADDAC(a[9], a[10]); + SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); + SQRADDAC(a[1], a[19]); + SQRADDAC(a[2], a[18]); + SQRADDAC(a[3], a[17]); + SQRADDAC(a[4], a[16]); + SQRADDAC(a[5], a[15]); + SQRADDAC(a[6], a[14]); + SQRADDAC(a[7], a[13]); + SQRADDAC(a[8], a[12]); + SQRADDAC(a[9], a[11]); + SQRADDDB; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); + SQRADDAC(a[1], a[20]); + SQRADDAC(a[2], a[19]); + SQRADDAC(a[3], a[18]); + SQRADDAC(a[4], a[17]); + SQRADDAC(a[5], a[16]); + SQRADDAC(a[6], a[15]); + SQRADDAC(a[7], a[14]); + SQRADDAC(a[8], a[13]); + SQRADDAC(a[9], a[12]); + SQRADDAC(a[10], a[11]); + SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); + SQRADDAC(a[1], a[21]); + SQRADDAC(a[2], a[20]); + SQRADDAC(a[3], a[19]); + SQRADDAC(a[4], a[18]); + SQRADDAC(a[5], a[17]); + SQRADDAC(a[6], a[16]); + SQRADDAC(a[7], a[15]); + SQRADDAC(a[8], a[14]); + SQRADDAC(a[9], a[13]); + SQRADDAC(a[10], a[12]); + SQRADDDB; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); + SQRADDAC(a[1], a[22]); + SQRADDAC(a[2], a[21]); + SQRADDAC(a[3], a[20]); + SQRADDAC(a[4], a[19]); + SQRADDAC(a[5], a[18]); + SQRADDAC(a[6], a[17]); + SQRADDAC(a[7], a[16]); + SQRADDAC(a[8], a[15]); + SQRADDAC(a[9], a[14]); + SQRADDAC(a[10], a[13]); + SQRADDAC(a[11], a[12]); + SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); + SQRADDAC(a[1], a[23]); + SQRADDAC(a[2], a[22]); + SQRADDAC(a[3], a[21]); + SQRADDAC(a[4], a[20]); + SQRADDAC(a[5], a[19]); + SQRADDAC(a[6], a[18]); + SQRADDAC(a[7], a[17]); + SQRADDAC(a[8], a[16]); + SQRADDAC(a[9], a[15]); + SQRADDAC(a[10], a[14]); + SQRADDAC(a[11], a[13]); + SQRADDDB; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); + SQRADDAC(a[1], a[24]); + SQRADDAC(a[2], a[23]); + SQRADDAC(a[3], a[22]); + SQRADDAC(a[4], a[21]); + SQRADDAC(a[5], a[20]); + SQRADDAC(a[6], a[19]); + SQRADDAC(a[7], a[18]); + SQRADDAC(a[8], a[17]); + SQRADDAC(a[9], a[16]); + SQRADDAC(a[10], a[15]); + SQRADDAC(a[11], a[14]); + SQRADDAC(a[12], a[13]); + SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); + SQRADDAC(a[1], a[25]); + SQRADDAC(a[2], a[24]); + SQRADDAC(a[3], a[23]); + SQRADDAC(a[4], a[22]); + SQRADDAC(a[5], a[21]); + SQRADDAC(a[6], a[20]); + SQRADDAC(a[7], a[19]); + SQRADDAC(a[8], a[18]); + SQRADDAC(a[9], a[17]); + SQRADDAC(a[10], a[16]); + SQRADDAC(a[11], a[15]); + SQRADDAC(a[12], a[14]); + SQRADDDB; + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); + SQRADDAC(a[1], a[26]); + SQRADDAC(a[2], a[25]); + SQRADDAC(a[3], a[24]); + SQRADDAC(a[4], a[23]); + SQRADDAC(a[5], a[22]); + SQRADDAC(a[6], a[21]); + SQRADDAC(a[7], a[20]); + SQRADDAC(a[8], a[19]); + SQRADDAC(a[9], a[18]); + SQRADDAC(a[10], a[17]); + SQRADDAC(a[11], a[16]); + SQRADDAC(a[12], a[15]); + SQRADDAC(a[13], a[14]); + SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); + SQRADDAC(a[1], a[27]); + SQRADDAC(a[2], a[26]); + SQRADDAC(a[3], a[25]); + SQRADDAC(a[4], a[24]); + SQRADDAC(a[5], a[23]); + SQRADDAC(a[6], a[22]); + SQRADDAC(a[7], a[21]); + SQRADDAC(a[8], a[20]); + SQRADDAC(a[9], a[19]); + SQRADDAC(a[10], a[18]); + SQRADDAC(a[11], a[17]); + SQRADDAC(a[12], a[16]); + SQRADDAC(a[13], a[15]); + SQRADDDB; + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); + SQRADDAC(a[1], a[28]); + SQRADDAC(a[2], a[27]); + SQRADDAC(a[3], a[26]); + SQRADDAC(a[4], a[25]); + SQRADDAC(a[5], a[24]); + SQRADDAC(a[6], a[23]); + SQRADDAC(a[7], a[22]); + SQRADDAC(a[8], a[21]); + SQRADDAC(a[9], a[20]); + SQRADDAC(a[10], a[19]); + SQRADDAC(a[11], a[18]); + SQRADDAC(a[12], a[17]); + SQRADDAC(a[13], a[16]); + SQRADDAC(a[14], a[15]); + SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); + SQRADDAC(a[1], a[29]); + SQRADDAC(a[2], a[28]); + SQRADDAC(a[3], a[27]); + SQRADDAC(a[4], a[26]); + SQRADDAC(a[5], a[25]); + SQRADDAC(a[6], a[24]); + SQRADDAC(a[7], a[23]); + SQRADDAC(a[8], a[22]); + SQRADDAC(a[9], a[21]); + SQRADDAC(a[10], a[20]); + SQRADDAC(a[11], a[19]); + SQRADDAC(a[12], a[18]); + SQRADDAC(a[13], a[17]); + SQRADDAC(a[14], a[16]); + SQRADDDB; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); + SQRADDAC(a[1], a[30]); + SQRADDAC(a[2], a[29]); + SQRADDAC(a[3], a[28]); + SQRADDAC(a[4], a[27]); + SQRADDAC(a[5], a[26]); + SQRADDAC(a[6], a[25]); + SQRADDAC(a[7], a[24]); + SQRADDAC(a[8], a[23]); + SQRADDAC(a[9], a[22]); + SQRADDAC(a[10], a[21]); + SQRADDAC(a[11], a[20]); + SQRADDAC(a[12], a[19]); + SQRADDAC(a[13], a[18]); + SQRADDAC(a[14], a[17]); + SQRADDAC(a[15], a[16]); + SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[31]); + SQRADDAC(a[2], a[30]); + SQRADDAC(a[3], a[29]); + SQRADDAC(a[4], a[28]); + SQRADDAC(a[5], a[27]); + SQRADDAC(a[6], a[26]); + SQRADDAC(a[7], a[25]); + SQRADDAC(a[8], a[24]); + SQRADDAC(a[9], a[23]); + SQRADDAC(a[10], a[22]); + SQRADDAC(a[11], a[21]); + SQRADDAC(a[12], a[20]); + SQRADDAC(a[13], a[19]); + SQRADDAC(a[14], a[18]); + SQRADDAC(a[15], a[17]); + SQRADDDB; + SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[31]); + SQRADDAC(a[3], a[30]); + SQRADDAC(a[4], a[29]); + SQRADDAC(a[5], a[28]); + SQRADDAC(a[6], a[27]); + SQRADDAC(a[7], a[26]); + SQRADDAC(a[8], a[25]); + SQRADDAC(a[9], a[24]); + SQRADDAC(a[10], a[23]); + SQRADDAC(a[11], a[22]); + SQRADDAC(a[12], a[21]); + SQRADDAC(a[13], a[20]); + SQRADDAC(a[14], a[19]); + SQRADDAC(a[15], a[18]); + SQRADDAC(a[16], a[17]); + SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[31]); + SQRADDAC(a[4], a[30]); + SQRADDAC(a[5], a[29]); + SQRADDAC(a[6], a[28]); + SQRADDAC(a[7], a[27]); + SQRADDAC(a[8], a[26]); + SQRADDAC(a[9], a[25]); + SQRADDAC(a[10], a[24]); + SQRADDAC(a[11], a[23]); + SQRADDAC(a[12], a[22]); + SQRADDAC(a[13], a[21]); + SQRADDAC(a[14], a[20]); + SQRADDAC(a[15], a[19]); + SQRADDAC(a[16], a[18]); + SQRADDDB; + SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[31]); + SQRADDAC(a[5], a[30]); + SQRADDAC(a[6], a[29]); + SQRADDAC(a[7], a[28]); + SQRADDAC(a[8], a[27]); + SQRADDAC(a[9], a[26]); + SQRADDAC(a[10], a[25]); + SQRADDAC(a[11], a[24]); + SQRADDAC(a[12], a[23]); + SQRADDAC(a[13], a[22]); + SQRADDAC(a[14], a[21]); + SQRADDAC(a[15], a[20]); + SQRADDAC(a[16], a[19]); + SQRADDAC(a[17], a[18]); + SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[31]); + SQRADDAC(a[6], a[30]); + SQRADDAC(a[7], a[29]); + SQRADDAC(a[8], a[28]); + SQRADDAC(a[9], a[27]); + SQRADDAC(a[10], a[26]); + SQRADDAC(a[11], a[25]); + SQRADDAC(a[12], a[24]); + SQRADDAC(a[13], a[23]); + SQRADDAC(a[14], a[22]); + SQRADDAC(a[15], a[21]); + SQRADDAC(a[16], a[20]); + SQRADDAC(a[17], a[19]); + SQRADDDB; + SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[31]); + SQRADDAC(a[7], a[30]); + SQRADDAC(a[8], a[29]); + SQRADDAC(a[9], a[28]); + SQRADDAC(a[10], a[27]); + SQRADDAC(a[11], a[26]); + SQRADDAC(a[12], a[25]); + SQRADDAC(a[13], a[24]); + SQRADDAC(a[14], a[23]); + SQRADDAC(a[15], a[22]); + SQRADDAC(a[16], a[21]); + SQRADDAC(a[17], a[20]); + SQRADDAC(a[18], a[19]); + SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[31]); + SQRADDAC(a[8], a[30]); + SQRADDAC(a[9], a[29]); + SQRADDAC(a[10], a[28]); + SQRADDAC(a[11], a[27]); + SQRADDAC(a[12], a[26]); + SQRADDAC(a[13], a[25]); + SQRADDAC(a[14], a[24]); + SQRADDAC(a[15], a[23]); + SQRADDAC(a[16], a[22]); + SQRADDAC(a[17], a[21]); + SQRADDAC(a[18], a[20]); + SQRADDDB; + SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[31]); + SQRADDAC(a[9], a[30]); + SQRADDAC(a[10], a[29]); + SQRADDAC(a[11], a[28]); + SQRADDAC(a[12], a[27]); + SQRADDAC(a[13], a[26]); + SQRADDAC(a[14], a[25]); + SQRADDAC(a[15], a[24]); + SQRADDAC(a[16], a[23]); + SQRADDAC(a[17], a[22]); + SQRADDAC(a[18], a[21]); + SQRADDAC(a[19], a[20]); + SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[31]); + SQRADDAC(a[10], a[30]); + SQRADDAC(a[11], a[29]); + SQRADDAC(a[12], a[28]); + SQRADDAC(a[13], a[27]); + SQRADDAC(a[14], a[26]); + SQRADDAC(a[15], a[25]); + SQRADDAC(a[16], a[24]); + SQRADDAC(a[17], a[23]); + SQRADDAC(a[18], a[22]); + SQRADDAC(a[19], a[21]); + SQRADDDB; + SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[31]); + SQRADDAC(a[11], a[30]); + SQRADDAC(a[12], a[29]); + SQRADDAC(a[13], a[28]); + SQRADDAC(a[14], a[27]); + SQRADDAC(a[15], a[26]); + SQRADDAC(a[16], a[25]); + SQRADDAC(a[17], a[24]); + SQRADDAC(a[18], a[23]); + SQRADDAC(a[19], a[22]); + SQRADDAC(a[20], a[21]); + SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[31]); + SQRADDAC(a[12], a[30]); + SQRADDAC(a[13], a[29]); + SQRADDAC(a[14], a[28]); + SQRADDAC(a[15], a[27]); + SQRADDAC(a[16], a[26]); + SQRADDAC(a[17], a[25]); + SQRADDAC(a[18], a[24]); + SQRADDAC(a[19], a[23]); + SQRADDAC(a[20], a[22]); + SQRADDDB; + SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[31]); + SQRADDAC(a[13], a[30]); + SQRADDAC(a[14], a[29]); + SQRADDAC(a[15], a[28]); + SQRADDAC(a[16], a[27]); + SQRADDAC(a[17], a[26]); + SQRADDAC(a[18], a[25]); + SQRADDAC(a[19], a[24]); + SQRADDAC(a[20], a[23]); + SQRADDAC(a[21], a[22]); + SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[31]); + SQRADDAC(a[14], a[30]); + SQRADDAC(a[15], a[29]); + SQRADDAC(a[16], a[28]); + SQRADDAC(a[17], a[27]); + SQRADDAC(a[18], a[26]); + SQRADDAC(a[19], a[25]); + SQRADDAC(a[20], a[24]); + SQRADDAC(a[21], a[23]); + SQRADDDB; + SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[31]); + SQRADDAC(a[15], a[30]); + SQRADDAC(a[16], a[29]); + SQRADDAC(a[17], a[28]); + SQRADDAC(a[18], a[27]); + SQRADDAC(a[19], a[26]); + SQRADDAC(a[20], a[25]); + SQRADDAC(a[21], a[24]); + SQRADDAC(a[22], a[23]); + SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[31]); + SQRADDAC(a[16], a[30]); + SQRADDAC(a[17], a[29]); + SQRADDAC(a[18], a[28]); + SQRADDAC(a[19], a[27]); + SQRADDAC(a[20], a[26]); + SQRADDAC(a[21], a[25]); + SQRADDAC(a[22], a[24]); + SQRADDDB; + SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[31]); + SQRADDAC(a[17], a[30]); + SQRADDAC(a[18], a[29]); + SQRADDAC(a[19], a[28]); + SQRADDAC(a[20], a[27]); + SQRADDAC(a[21], a[26]); + SQRADDAC(a[22], a[25]); + SQRADDAC(a[23], a[24]); + SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[31]); + SQRADDAC(a[18], a[30]); + SQRADDAC(a[19], a[29]); + SQRADDAC(a[20], a[28]); + SQRADDAC(a[21], a[27]); + SQRADDAC(a[22], a[26]); + SQRADDAC(a[23], a[25]); + SQRADDDB; + SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[31]); + SQRADDAC(a[19], a[30]); + SQRADDAC(a[20], a[29]); + SQRADDAC(a[21], a[28]); + SQRADDAC(a[22], a[27]); + SQRADDAC(a[23], a[26]); + SQRADDAC(a[24], a[25]); + SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[31]); + SQRADDAC(a[20], a[30]); + SQRADDAC(a[21], a[29]); + SQRADDAC(a[22], a[28]); + SQRADDAC(a[23], a[27]); + SQRADDAC(a[24], a[26]); + SQRADDDB; + SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[31]); + SQRADDAC(a[21], a[30]); + SQRADDAC(a[22], a[29]); + SQRADDAC(a[23], a[28]); + SQRADDAC(a[24], a[27]); + SQRADDAC(a[25], a[26]); + SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[31]); + SQRADDAC(a[22], a[30]); + SQRADDAC(a[23], a[29]); + SQRADDAC(a[24], a[28]); + SQRADDAC(a[25], a[27]); + SQRADDDB; + SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[31]); + SQRADDAC(a[23], a[30]); + SQRADDAC(a[24], a[29]); + SQRADDAC(a[25], a[28]); + SQRADDAC(a[26], a[27]); + SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[31]); + SQRADDAC(a[24], a[30]); + SQRADDAC(a[25], a[29]); + SQRADDAC(a[26], a[28]); + SQRADDDB; + SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[31]); + SQRADDAC(a[25], a[30]); + SQRADDAC(a[26], a[29]); + SQRADDAC(a[27], a[28]); + SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[31]); + SQRADDAC(a[26], a[30]); + SQRADDAC(a[27], a[29]); + SQRADDDB; + SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[31]); + SQRADDAC(a[27], a[30]); + SQRADDAC(a[28], a[29]); + SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); + SQRADD2(a[28], a[30]); + SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); + SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); + SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = ZPOS; + memcpy(B->dp, b, 64 * sizeof(mp_digit)); + mp_clamp(B); +} diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm new file mode 100644 index 0000000000..e50efa8de3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm @@ -0,0 +1,13066 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +;/* TomsFastMath, a fast ISO C bignum library. +; * +; * This project is meant to fill in where LibTomMath +; * falls short. That is speed ;-) +; * +; * This project is public domain and free for all purposes. +; * +; * Tom St Denis, tomstdenis@iahu.ca +; */ + +;/* +; * The source file from which this assembly was derived +; * comes from TFM v0.03, which has the above license. +; * This source was from mp_comba_amd64.sun.s and convert to +; * MASM code set. +; */ + +.CODE + +externdef memcpy:PROC + +public s_mp_mul_comba_4 +public s_mp_mul_comba_8 +public s_mp_mul_comba_16 +public s_mp_mul_comba_32 +public s_mp_sqr_comba_8 +public s_mp_sqr_comba_16 +public s_mp_sqr_comba_32 + + +; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C) + + ALIGN 16 +s_mp_mul_comba_4 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + sub rsp, 64 + mov r9, qword ptr [16+rdi] + mov rbx, rdx + mov rdx, qword ptr [16+rsi] + mov rax, qword ptr [r9] + mov qword ptr [-64+64+rsp], rax + mov r8, qword ptr [8+r9] + mov qword ptr [-56+64+rsp], r8 + mov rbp, qword ptr [16+r9] + mov qword ptr [-48+64+rsp], rbp + mov r12, qword ptr [24+r9] + mov qword ptr [-40+64+rsp], r12 + mov rcx, qword ptr [rdx] + mov qword ptr [-32+64+rsp], rcx + mov r10, qword ptr [8+rdx] + mov qword ptr [-24+64+rsp], r10 + mov r11, qword ptr [16+rdx] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [-16+64+rsp], r11 + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [24+rdx] + mov qword ptr [-8+64+rsp], rax + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-32+64+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-24+64+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-32+64+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-16+64+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-24+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-32+64+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-8+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-16+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-24+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-32+64+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-8+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-16+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-24+64+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-8+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-16+64+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-8+64+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [48+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [56+r11], rcx + mov dword ptr [8+rbx], 8 + jne L9 + ALIGN 16 +L18: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L9 + lea r10d, dword ptr [-2+rdx] + cmp qword ptr [r11+r10*8], 0 + je L18 +L9: + mov edx, dword ptr [8+rbx] + xor r11d, r11d + test edx, edx + cmovne r11d, esi + mov dword ptr [rbx], r11d + add rsp, 64 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_4 ENDP + + +; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C) + + ALIGN 16 +s_mp_mul_comba_8 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + mov rbx, rdx + sub rsp, 8+128 + mov rdx, qword ptr [16+rdi] + mov r8, qword ptr [rdx] + mov qword ptr [-120+128+rsp], r8 + mov rbp, qword ptr [8+rdx] + mov qword ptr [-112+128+rsp], rbp + mov r9, qword ptr [16+rdx] + mov qword ptr [-104+128+rsp], r9 + mov r12, qword ptr [24+rdx] + mov qword ptr [-96+128+rsp], r12 + mov rcx, qword ptr [32+rdx] + mov qword ptr [-88+128+rsp], rcx + mov r10, qword ptr [40+rdx] + mov qword ptr [-80+128+rsp], r10 + mov r11, qword ptr [48+rdx] + mov qword ptr [-72+128+rsp], r11 + mov rax, qword ptr [56+rdx] + mov rdx, qword ptr [16+rsi] + mov qword ptr [-64+128+rsp], rax + mov r8, qword ptr [rdx] + mov qword ptr [-56+128+rsp], r8 + mov rbp, qword ptr [8+rdx] + mov qword ptr [-48+128+rsp], rbp + mov r9, qword ptr [16+rdx] + mov qword ptr [-40+128+rsp], r9 + mov r12, qword ptr [24+rdx] + mov qword ptr [-32+128+rsp], r12 + mov rcx, qword ptr [32+rdx] + mov qword ptr [-24+128+rsp], rcx + mov r10, qword ptr [40+rdx] + mov qword ptr [-16+128+rsp], r10 + mov r11, qword ptr [48+rdx] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [-8+128+rsp], r11 + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [56+rdx] + mov qword ptr [128+rsp], rax + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-48+128+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-56+128+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [48+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [56+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [64+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [72+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [80+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [88+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [96+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [104+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [112+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [120+r11], rcx + mov dword ptr [8+rbx], 16 + jne L35 + ALIGN 16 +L43: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L35 + lea eax, dword ptr [-2+rdx] + cmp qword ptr [r11+rax*8], 0 + je L43 +L35: + mov r11d, dword ptr [8+rbx] + xor edx, edx + test r11d, r11d + cmovne edx, esi + mov dword ptr [rbx], edx + add rsp, 8+128 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_8 ENDP + + +; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); + + ALIGN 16 +s_mp_mul_comba_16 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + mov rbx, rdx + sub rsp, 136+128 + mov rax, qword ptr [16+rdi] + mov r8, qword ptr [rax] + mov qword ptr [-120+128+rsp], r8 + mov rbp, qword ptr [8+rax] + mov qword ptr [-112+128+rsp], rbp + mov r9, qword ptr [16+rax] + mov qword ptr [-104+128+rsp], r9 + mov r12, qword ptr [24+rax] + mov qword ptr [-96+128+rsp], r12 + mov rcx, qword ptr [32+rax] + mov qword ptr [-88+128+rsp], rcx + mov r10, qword ptr [40+rax] + mov qword ptr [-80+128+rsp], r10 + mov rdx, qword ptr [48+rax] + mov qword ptr [-72+128+rsp], rdx + mov r11, qword ptr [56+rax] + mov qword ptr [-64+128+rsp], r11 + mov r8, qword ptr [64+rax] + mov qword ptr [-56+128+rsp], r8 + mov rbp, qword ptr [72+rax] + mov qword ptr [-48+128+rsp], rbp + mov r9, qword ptr [80+rax] + mov qword ptr [-40+128+rsp], r9 + mov r12, qword ptr [88+rax] + mov qword ptr [-32+128+rsp], r12 + mov rcx, qword ptr [96+rax] + mov qword ptr [-24+128+rsp], rcx + mov r10, qword ptr [104+rax] + mov qword ptr [-16+128+rsp], r10 + mov rdx, qword ptr [112+rax] + mov qword ptr [-8+128+rsp], rdx + mov r11, qword ptr [120+rax] + mov qword ptr [128+rsp], r11 + mov r11, qword ptr [16+rsi] + mov r8, qword ptr [r11] + mov qword ptr [8+128+rsp], r8 + mov rbp, qword ptr [8+r11] + mov qword ptr [16+128+rsp], rbp + mov r9, qword ptr [16+r11] + mov qword ptr [24+128+rsp], r9 + mov r12, qword ptr [24+r11] + mov qword ptr [32+128+rsp], r12 + mov rcx, qword ptr [32+r11] + mov qword ptr [40+128+rsp], rcx + mov r10, qword ptr [40+r11] + mov qword ptr [48+128+rsp], r10 + mov rdx, qword ptr [48+r11] + mov qword ptr [56+128+rsp], rdx + mov rax, qword ptr [56+r11] + mov qword ptr [64+128+rsp], rax + mov r8, qword ptr [64+r11] + mov qword ptr [72+128+rsp], r8 + mov rbp, qword ptr [72+r11] + mov qword ptr [80+128+rsp], rbp + mov r9, qword ptr [80+r11] + mov qword ptr [88+128+rsp], r9 + mov r12, qword ptr [88+r11] + mov qword ptr [96+128+rsp], r12 + mov rcx, qword ptr [96+r11] + mov qword ptr [104+128+rsp], rcx + mov r10, qword ptr [104+r11] + mov qword ptr [112+128+rsp], r10 + mov rdx, qword ptr [112+r11] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [120+128+rsp], rdx + mov rax, qword ptr [120+r11] + mov qword ptr [128+128+rsp], rax + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [16+128+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [8+128+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [48+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [56+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [64+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [72+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [80+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [88+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [96+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [104+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [112+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [120+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [128+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [136+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [144+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [152+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [160+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [168+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [176+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [184+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [192+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [200+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [208+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [216+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [224+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [232+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [240+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [248+r11], rcx + mov dword ptr [8+rbx], 32 + jne L76 + ALIGN 16 +L84: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L76 + lea eax, dword ptr [-2+rdx] + cmp qword ptr [r11+rax*8], 0 + je L84 +L76: + mov edx, dword ptr [8+rbx] + xor r11d, r11d + test edx, edx + cmovne r11d, esi + mov dword ptr [rbx], r11d + add rsp, 136+128 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_16 ENDP + +; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C) + + + ALIGN 16 +s_mp_mul_comba_32 PROC ; a "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push rbp + mov rbp, rsp + push r13 + mov r13, rdx +; mov edx, 256 + mov r8d, 256 + push r12 + mov r12, rsi + push rbx + mov rbx, rdi + sub rsp, 520+32 ; +32 for "home" storage +; mov rsi, qword ptr [16+rdi] +; lea rdi, qword ptr [-544+rbp] + mov rdx, qword ptr [16+rdi] + lea rcx, qword ptr [-544+rbp] + call memcpy +; mov rsi, qword ptr [16+r12] +; lea rdi, qword ptr [-288+rbp] +; mov edx, 256 + mov rdx, qword ptr [16+r12] + lea rcx, qword ptr [-288+rbp] + mov r8d, 256 + call memcpy + mov r9, qword ptr [16+r13] + xor r8d, r8d + mov rsi, r8 + mov rdi, r8 + mov r10, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc rdi, rdx + adc r10, 0 + mov qword ptr [r9], rsi + mov rsi, r10 + mov r10, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-280+rbp] + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov r11, r10 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-288+rbp] + add rdi, rax + adc rsi, rdx + adc r11, 0 + mov qword ptr [8+r9], rdi + mov rdi, r11 + mov r11, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc r11, 0 + mov rcx, r11 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-528+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [16+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-520+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [24+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-512+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [32+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-504+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [40+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-496+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [48+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-488+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [56+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-480+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [64+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-472+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [72+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-464+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [80+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-456+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [88+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-448+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [96+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-440+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [104+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-432+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [112+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-424+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [120+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-416+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [128+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-408+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [136+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-400+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [144+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-392+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [152+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-384+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [160+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-376+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [168+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-368+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [176+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-360+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [184+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-352+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [192+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-344+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [200+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-336+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [208+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-328+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [216+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-320+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [224+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-312+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [232+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-304+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [240+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [248+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [256+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [264+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [272+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [280+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [288+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [296+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [304+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [312+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [320+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [328+r9], rcx + mov rdi, r11 + mov r11, r10 + mov r10, r8 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-40+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-48+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-56+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-64+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-72+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-80+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-88+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-96+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-104+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-112+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-120+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-128+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-136+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-144+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-152+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-160+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-168+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-176+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-184+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-192+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-296+rbp] + mul qword ptr [-200+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov qword ptr [336+r9], r11 + mov rsi, r10 + mov r10, r8 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-40+rbp] + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov rcx, r10 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-48+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-56+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-64+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-72+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-80+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-88+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-96+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-104+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-112+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-120+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-128+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-136+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-144+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-152+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-160+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-168+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-176+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-184+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov r11, rsi + mov r10, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-192+rbp] + add rdi, rax + adc r11, rdx + adc r10, 0 + mov qword ptr [344+r9], rdi + mov rcx, r11 + mov rdi, r10 + mov r11, r8 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc r11, 0 + mov rsi, r11 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [352+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [360+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [368+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [376+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [384+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [392+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [400+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [408+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [416+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [424+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [432+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [440+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [448+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [456+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [464+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [472+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [480+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r11, rcx + mov r10, rdi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [488+r9], rsi + mov rcx, r10 + mov rsi, r11 + mov rax, qword ptr [-296+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rsi, rdx + adc r8, 0 + mov qword ptr [496+r9], rcx + mov ecx, dword ptr [r12] + xor ecx, dword ptr [rbx] + test rsi, rsi + mov qword ptr [504+r9], rsi + mov dword ptr [8+r13], 64 + jne L149 + ALIGN 16 +L157: + mov edx, dword ptr [8+r13] + lea ebx, dword ptr [-1+rdx] + test ebx, ebx + mov dword ptr [8+r13], ebx + je L149 + lea r12d, dword ptr [-2+rdx] + cmp qword ptr [r9+r12*8], 0 + je L157 +L149: + mov r9d, dword ptr [8+r13] + xor edx, edx + test r9d, r9d + cmovne edx, ecx + mov dword ptr [r13], edx + add rsp, 520+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop rbp + pop rsi + pop rdi + + ret + +s_mp_mul_comba_32 ENDP + + +; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_4 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + push rbx + sub rsp, 80 + mov r11, rsi + xor esi, esi + mov r10, rsi + mov rbp, rsi + mov r8, rsi + mov rbx, rsi + mov rcx, qword ptr [16+rdi] + mov rdi, rsi + mov rax, qword ptr [rcx] + mul rax + add r10, rax + adc rbx, rdx + adc rdi, 0 + mov qword ptr [-72+80+rsp], r10 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rdi, rdx + adc rbp, 0 + add rbx, rax + adc rdi, rdx + adc rbp, 0 + mov qword ptr [-64+80+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rdi, rax + adc rbp, rdx + adc r8, 0 + add rdi, rax + adc rbp, rdx + adc r8, 0 + mov rbx, rbp + mov rbp, r8 + mov rax, qword ptr [8+rcx] + mul rax + add rdi, rax + adc rbx, rdx + adc rbp, 0 + mov qword ptr [-56+80+rsp], rdi + mov r9, rbp + mov r8, rbx + mov rdi, rsi + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r8, rax + adc r9, rdx + adc rdi, 0 + add r8, rax + adc r9, rdx + adc rdi, 0 + mov rbx, r9 + mov rbp, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r8, rax + adc rbx, rdx + adc rbp, 0 + add r8, rax + adc rbx, rdx + adc rbp, 0 + mov qword ptr [-48+80+rsp], r8 + mov r9, rbp + mov rdi, rbx + mov r8, rsi + mov dword ptr [8+r11], 8 + mov dword ptr [r11], 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add rdi, rax + adc r9, rdx + adc r8, 0 + add rdi, rax + adc r9, rdx + adc r8, 0 + mov rbx, r9 + mov rbp, r8 + mov rax, qword ptr [16+rcx] + mul rax + add rdi, rax + adc rbx, rdx + adc rbp, 0 + mov rax, rbp + mov qword ptr [-40+80+rsp], rdi + mov rbp, rbx + mov rdi, rax + mov rbx, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add rbp, rax + adc rdi, rdx + adc rbx, 0 + add rbp, rax + adc rdi, rdx + adc rbx, 0 + mov qword ptr [-32+80+rsp], rbp + mov r9, rbx + mov rax, qword ptr [24+rcx] + mul rax + add rdi, rax + adc r9, rdx + adc rsi, 0 + mov rdx, qword ptr [16+r11] + mov qword ptr [-24+80+rsp], rdi + mov qword ptr [-16+80+rsp], r9 + mov qword ptr [rdx], r10 + mov r8, qword ptr [-64+80+rsp] + mov qword ptr [8+rdx], r8 + mov rbp, qword ptr [-56+80+rsp] + mov qword ptr [16+rdx], rbp + mov rdi, qword ptr [-48+80+rsp] + mov qword ptr [24+rdx], rdi + mov rsi, qword ptr [-40+80+rsp] + mov qword ptr [32+rdx], rsi + mov rbx, qword ptr [-32+80+rsp] + mov qword ptr [40+rdx], rbx + mov rcx, qword ptr [-24+80+rsp] + mov qword ptr [48+rdx], rcx + mov rax, qword ptr [-16+80+rsp] + mov qword ptr [56+rdx], rax + mov edx, dword ptr [8+r11] + test edx, edx + je L168 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r11] + mov r10d, ecx + cmp qword ptr [rsi+r10*8], 0 + jne L166 + mov edx, ecx + ALIGN 16 +L167: + test edx, edx + mov ecx, edx + je L171 + dec edx + mov eax, edx + cmp qword ptr [rsi+rax*8], 0 + je L167 + mov dword ptr [8+r11], ecx + mov edx, ecx +L166: + test edx, edx + je L168 + mov eax, dword ptr [r11] + jmp L169 + +L171: + mov dword ptr [8+r11], edx +L168: + xor eax, eax +L169: + add rsp, 80 + pop rbx + pop rbp + mov dword ptr [r11], eax + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_4 ENDP + + +; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_8 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + + push r14 + xor r9d, r9d + mov r14, r9 + mov r10, r9 + push r13 + mov r13, r9 + push r12 + mov r12, r9 + push rbp + mov rbp, rsi + mov rsi, r9 + push rbx + mov rbx, r9 + sub rsp, 8+128 + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r14, rax + adc rbx, rdx + adc r12, 0 + mov qword ptr [-120+128+rsp], r14 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc r12, rdx + adc r10, 0 + add rbx, rax + adc r12, rdx + adc r10, 0 + mov qword ptr [-112+128+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add r12, rax + adc r10, rdx + adc r13, 0 + add r12, rax + adc r10, rdx + adc r13, 0 + mov rbx, r10 + mov r10, r13 + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul rax + add r12, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-104+128+rsp], r12 + mov rdi, r10 + mov r11, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r11, rax + adc rdi, rdx + adc rsi, 0 + add r11, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, rdi + mov r10, rsi + mov rdi, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r11, rax + adc rbx, rdx + adc r10, 0 + add r11, rax + adc rbx, rdx + adc r10, 0 + mov rsi, r9 + mov qword ptr [-96+128+rsp], r11 + mov r8, r10 + mov r12, rbx + mov r11, r9 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r12, rax + adc r8, rdx + adc r13, 0 + add r12, rax + adc r8, rdx + adc r13, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r12, rax + adc r8, rdx + adc r13, 0 + add r12, rax + adc r8, rdx + adc r13, 0 + mov rbx, r8 + mov r10, r13 + mov r8, r9 + mov rax, qword ptr [16+rcx] + mul rax + add r12, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-88+128+rsp], r12 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r11, rsi + add rbx, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-80+128+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-72+128+rsp], r10 + mov r10, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + add rbx, r8 + adc r10, rdi + adc rax, rsi + add rbx, r8 + adc r10, rdi + adc rax, rsi + mov qword ptr [-64+128+rsp], rbx + mov r11, rax + mov rbx, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rsi, rbx + mov rdi, r13 + mov rbx, r11 + mov r13, r12 + mov r11, rsi + mov rax, qword ptr [32+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-56+128+rsp], r10 + mov r10, r9 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor r13, r13 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc r13, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc r13, 0 + mov r12, rdi + mov rax, r13 + add rbx, r8 + adc r11, r12 + adc r10, rax + add rbx, r8 + adc r11, r12 + adc r10, rax + mov qword ptr [-48+128+rsp], rbx + mov r12, r11 + mov rsi, r10 + mov rbx, r9 + mov r11, r9 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r12, rax + adc rsi, rdx + adc rbx, 0 + add r12, rax + adc rsi, rdx + adc rbx, 0 + mov r13, rbx + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r12, rax + adc rsi, rdx + adc r13, 0 + add r12, rax + adc rsi, rdx + adc r13, 0 + mov r10, rsi + mov rbx, r13 + mov r13, r9 + mov rax, qword ptr [40+rcx] + mul rax + add r12, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-40+128+rsp], r12 + mov r8, rbx + mov rdi, r10 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add rdi, rax + adc r8, rdx + adc r11, 0 + add rdi, rax + adc r8, rdx + adc r11, 0 + mov r10, r8 + mov rbx, r11 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add rdi, rax + adc r10, rdx + adc rbx, 0 + add rdi, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-32+128+rsp], rdi + mov rsi, rbx + mov r12, r10 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r12, rax + adc rsi, rdx + adc r13, 0 + add r12, rax + adc rsi, rdx + adc r13, 0 + mov r10, rsi + mov rbx, r13 + mov rax, qword ptr [48+rcx] + mul rax + add r12, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-24+128+rsp], r12 + mov rdi, r10 + mov rsi, rbx + mov r10, r9 + mov dword ptr [8+rbp], 16 + mov dword ptr [rbp], 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add rdi, rax + adc rsi, rdx + adc r10, 0 + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov qword ptr [-16+128+rsp], rdi + mov r8, r10 + mov rax, qword ptr [56+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r9, 0 + mov rax, qword ptr [16+rbp] + mov qword ptr [-8+128+rsp], rsi + mov qword ptr [128+rsp], r8 + mov qword ptr [rax], r14 + mov rbx, qword ptr [-112+128+rsp] + mov qword ptr [8+rax], rbx + mov rcx, qword ptr [-104+128+rsp] + mov qword ptr [16+rax], rcx + mov rdx, qword ptr [-96+128+rsp] + mov qword ptr [24+rax], rdx + mov r14, qword ptr [-88+128+rsp] + mov qword ptr [32+rax], r14 + mov r13, qword ptr [-80+128+rsp] + mov qword ptr [40+rax], r13 + mov r12, qword ptr [-72+128+rsp] + mov qword ptr [48+rax], r12 + mov r11, qword ptr [-64+128+rsp] + mov qword ptr [56+rax], r11 + mov r10, qword ptr [-56+128+rsp] + mov qword ptr [64+rax], r10 + mov r9, qword ptr [-48+128+rsp] + mov qword ptr [72+rax], r9 + mov r8, qword ptr [-40+128+rsp] + mov qword ptr [80+rax], r8 + mov rdi, qword ptr [-32+128+rsp] + mov qword ptr [88+rax], rdi + mov rsi, qword ptr [-24+128+rsp] + mov qword ptr [96+rax], rsi + mov rbx, qword ptr [-16+128+rsp] + mov qword ptr [104+rax], rbx + mov rcx, qword ptr [-8+128+rsp] + mov qword ptr [112+rax], rcx + mov rdx, qword ptr [128+rsp] + mov qword ptr [120+rax], rdx + mov edx, dword ptr [8+rbp] + test edx, edx + je L192 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+rbp] + mov r14d, ecx + cmp qword ptr [rsi+r14*8], 0 + jne L190 + mov edx, ecx + ALIGN 16 +L191: + test edx, edx + mov ecx, edx + je L195 + dec edx + mov r9d, edx + cmp qword ptr [rsi+r9*8], 0 + je L191 + mov dword ptr [8+rbp], ecx + mov edx, ecx +L190: + test edx, edx + je L192 + mov eax, dword ptr [rbp] + jmp L193 + +L195: + mov dword ptr [8+rbp], edx +L192: + xor eax, eax +L193: + mov dword ptr [rbp], eax + add rsp, 8+128 + pop rbx + pop rbp + pop r12 + pop r13 + pop r14 + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_8 ENDP + + +; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B) + + ALIGN 16 +s_mp_sqr_comba_16 PROC ; A "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + xor r9d, r9d + mov r8, r9 + mov r11, r9 + mov rbp, rsp + push r14 + mov r14, rsi + mov rsi, r9 + push r13 + mov r13, r9 + push r12 + mov r12, r9 + push rbx + mov rbx, r9 + sub rsp, 256+32 ; +32 for "home" storage + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r8, rax + adc rbx, rdx + adc rsi, 0 + mov qword ptr [-288+rbp], r8 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rsi, rdx + adc r12, 0 + add rbx, rax + adc rsi, rdx + adc r12, 0 + mov qword ptr [-280+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rsi, rax + adc r12, rdx + adc r13, 0 + add rsi, rax + adc r12, rdx + adc r13, 0 + mov rbx, r12 + mov r10, r13 + mov rax, qword ptr [8+rcx] + mul rax + add rsi, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-272+rbp], rsi + mov rdi, r10 + mov rsi, r9 + mov r10, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r10, rax + adc rdi, rdx + adc r11, 0 + add r10, rax + adc rdi, rdx + adc r11, 0 + mov r12, rdi + mov rbx, r11 + mov rdi, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r10, rax + adc r12, rdx + adc rbx, 0 + add r10, rax + adc r12, rdx + adc rbx, 0 + mov r11, r9 + mov qword ptr [-264+rbp], r10 + mov r8, rbx + mov r13, r12 + mov r12, r9 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rbx, r8 + mov r10, r12 + mov r8, r9 + mov rax, qword ptr [16+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-256+rbp], r13 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r11, rsi + add rbx, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-248+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-240+rbp], r10 + mov r10, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add rbx, r8 + adc r10, rdi + adc rdx, rsi + add rbx, r8 + adc r10, rdi + adc rdx, rsi + mov r11, rdx + mov qword ptr [-232+rbp], rbx + mov rbx, r9 + mov rax, qword ptr [rcx] + mul qword ptr [64+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rax, qword ptr [32+rcx] + mul rax + add r10, rax + adc r11, rdx + adc rbx, 0 + mov rdi, r13 + mov qword ptr [-224+rbp], r10 + mov rsi, r12 + mov r10, rbx + mov r12, r9 + mov rax, qword ptr [rcx] + mul qword ptr [72+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r11, r8 + adc r10, rdi + adc r12, rsi + add r11, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-216+rbp], r11 + mov rbx, r12 + mov rax, qword ptr [rcx] + mul qword ptr [80+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc rbx, r13 + adc rax, r12 + add r10, r8 + adc rbx, r13 + adc rax, r12 + mov rdx, rax + mov r11, rbx + mov rdi, r13 + mov rbx, rdx + mov rsi, r12 + mov rax, qword ptr [40+rcx] + mul rax + add r10, rax + adc r11, rdx + adc rbx, 0 + mov qword ptr [-208+rbp], r10 + mov r10, rbx + mov rax, qword ptr [rcx] + mul qword ptr [88+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add r11, r8 + adc r10, rdi + adc rdx, rsi + add r11, r8 + adc r10, rdi + adc rdx, rsi + mov r13, rdx + mov qword ptr [-200+rbp], r11 + mov r12, r13 + mov rax, qword ptr [rcx] + mul qword ptr [96+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rdx, rdi + mov r11, rsi + add r10, r8 + adc r12, rdx + adc rax, r11 + add r10, r8 + adc r12, rdx + adc rax, r11 + mov rbx, rdx + mov r13, rax + mov rsi, r11 + mov rax, qword ptr [48+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rdi, rbx + mov qword ptr [-192+rbp], r10 + mov r10, r13 + mov rax, qword ptr [rcx] + mul qword ptr [104+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r13, rsi + add r12, r8 + adc r10, rdi + adc r13, rsi + mov qword ptr [-184+rbp], r12 + mov r12, r13 + mov rax, qword ptr [rcx] + mul qword ptr [112+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rbx, rdi + mov rdx, rsi + add r10, r8 + adc r12, rbx + adc rax, rdx + add r10, r8 + adc r12, rbx + adc rax, rdx + mov r11, rdx + mov r13, rax + mov rdi, rbx + mov rax, qword ptr [56+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-176+rbp], r10 + mov r10, r13 + mov rax, qword ptr [rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r13, rsi + add r12, r8 + adc r10, rdi + adc r13, rsi + mov qword ptr [-168+rbp], r12 + mov r12, r13 + mov rax, qword ptr [8+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rbx, rdi + mov rdx, rsi + add r10, r8 + adc r12, rbx + adc rax, rdx + add r10, r8 + adc r12, rbx + adc rax, rdx + mov r11, rdx + mov r13, rax + mov rdi, rbx + mov rax, qword ptr [64+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-160+rbp], r10 + mov r11, r9 + mov rax, qword ptr [16+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r10, r13 + mov rbx, r9 + mov rax, qword ptr [24+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r11, rsi + add r12, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-152+rbp], r12 + mov rax, qword ptr [24+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [32+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r9 + mov rax, qword ptr [72+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-144+rbp], r10 + mov r10, r11 + mov rax, qword ptr [32+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [40+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r12, rsi + add rbx, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-136+rbp], rbx + mov r11, r12 + mov rax, qword ptr [40+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [48+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [80+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-128+rbp], r10 + mov r10, r11 + mov rax, qword ptr [48+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [56+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add rbx, r8 + adc r10, rdi + adc rdx, rsi + add rbx, r8 + adc r10, rdi + adc rdx, rsi + mov qword ptr [-120+rbp], rbx + mov r11, rdx + mov rbx, r9 + mov rax, qword ptr [56+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [64+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r9 + mov rax, qword ptr [88+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-112+rbp], r10 + mov r10, r11 + mov rax, qword ptr [64+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [72+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r12, rsi + add rbx, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-104+rbp], rbx + mov r11, r12 + mov rax, qword ptr [72+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [80+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [96+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-96+rbp], r10 + mov r10, r9 + mov rax, qword ptr [80+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [88+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r12, rdi + mov rax, rsi + mov rsi, r9 + add rbx, r8 + adc r11, r12 + adc r10, rax + add rbx, r8 + adc r11, r12 + adc r10, rax + mov r12, r9 + mov qword ptr [-88+rbp], rbx + mov r13, r11 + mov r11, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [120+rcx] + add r13, rax + adc r11, rdx + adc r12, 0 + add r13, rax + adc r11, rdx + adc r12, 0 + mov rdi, r12 + mov rax, qword ptr [96+rcx] + mul qword ptr [112+rcx] + add r13, rax + adc r11, rdx + adc rdi, 0 + add r13, rax + adc r11, rdx + adc rdi, 0 + mov rbx, r11 + mov r10, rdi + mov r11, r9 + mov rax, qword ptr [104+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-80+rbp], r13 + mov r8, r10 + mov r10, rbx + mov rax, qword ptr [96+rcx] + mul qword ptr [120+rcx] + add r10, rax + adc r8, rdx + adc rsi, 0 + add r10, rax + adc r8, rdx + adc rsi, 0 + mov r12, r8 + mov rbx, rsi + mov rax, qword ptr [104+rcx] + mul qword ptr [112+rcx] + add r10, rax + adc r12, rdx + adc rbx, 0 + add r10, rax + adc r12, rdx + adc rbx, 0 + mov qword ptr [-72+rbp], r10 + mov r13, rbx + mov rbx, r12 + mov rax, qword ptr [104+rcx] + mul qword ptr [120+rcx] + add rbx, rax + adc r13, rdx + adc r11, 0 + add rbx, rax + adc r13, rdx + adc r11, 0 + mov r12, r11 + mov r10, r13 + mov rax, qword ptr [112+rcx] + mul rax + add rbx, rax + adc r10, rdx + adc r12, 0 + mov qword ptr [-64+rbp], rbx + mov rdi, r10 + mov rbx, r9 + mov rsi, r12 + mov rax, qword ptr [112+rcx] + mul qword ptr [120+rcx] + add rdi, rax + adc rsi, rdx + adc rbx, 0 + add rdi, rax + adc rsi, rdx + adc rbx, 0 + mov qword ptr [-56+rbp], rdi + mov r8, rbx + mov rax, qword ptr [120+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r9, 0 + mov qword ptr [-48+rbp], rsi + mov qword ptr [-40+rbp], r8 + mov dword ptr [8+r14], 32 + mov dword ptr [r14], 0 +; mov rdi, qword ptr [16+r14] +; lea rsi, qword ptr [-288+rbp] +; mov edx, 256 + mov rcx, qword ptr [16+r14] + lea rdx, qword ptr [-288+rbp] + mov r8d, 256 + call memcpy + mov edx, dword ptr [8+r14] + test edx, edx + je L232 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r14] + mov r9d, ecx + cmp qword ptr [rsi+r9*8], 0 + jne L230 + mov edx, ecx + ALIGN 16 +L231: + test edx, edx + mov ecx, edx + je L235 + dec edx + mov eax, edx + cmp qword ptr [rsi+rax*8], 0 + je L231 + mov dword ptr [8+r14], ecx + mov edx, ecx +L230: + test edx, edx + je L232 + mov eax, dword ptr [r14] + jmp L233 + +L235: + mov dword ptr [8+r14], edx +L232: + xor eax, eax +L233: + mov dword ptr [r14], eax + add rsp, 256+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop r14 + pop rbp + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_16 ENDP + + +; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_32 PROC ; A "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + xor r10d, r10d + mov r8, r10 + mov r11, r10 + mov rbp, rsp + push r14 + mov r14, rsi + mov rsi, r10 + push r13 + mov r13, r10 + push r12 + mov r12, r10 + push rbx + mov rbx, r10 + sub rsp, 512+32 ; +32 for "home" storage + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r8, rax + adc rbx, rdx + adc rsi, 0 + mov qword ptr [-544+rbp], r8 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rsi, rdx + adc r12, 0 + add rbx, rax + adc rsi, rdx + adc r12, 0 + mov qword ptr [-536+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rsi, rax + adc r12, rdx + adc r13, 0 + add rsi, rax + adc r12, rdx + adc r13, 0 + mov rbx, r12 + mov r9, r13 + mov rax, qword ptr [8+rcx] + mul rax + add rsi, rax + adc rbx, rdx + adc r9, 0 + mov qword ptr [-528+rbp], rsi + mov rdi, r9 + mov rsi, r10 + mov r9, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r9, rax + adc rdi, rdx + adc r11, 0 + add r9, rax + adc rdi, rdx + adc r11, 0 + mov r12, rdi + mov r13, r11 + mov rdi, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r9, rax + adc r12, rdx + adc r13, 0 + add r9, rax + adc r12, rdx + adc r13, 0 + mov r11, r10 + mov qword ptr [-520+rbp], r9 + mov r8, r13 + mov r13, r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rbx, r8 + mov r9, r12 + mov r8, r10 + mov rax, qword ptr [16+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r9, 0 + mov qword ptr [-512+rbp], r13 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r11, rsi + add rbx, r8 + adc r9, rdi + adc r11, rsi + mov qword ptr [-504+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-496+rbp], r9 + mov r9, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add rbx, r8 + adc r9, rdi + adc rdx, rsi + add rbx, r8 + adc r9, rdi + adc rdx, rsi + mov r11, rdx + mov qword ptr [-488+rbp], rbx + mov rbx, r10 + mov rax, qword ptr [rcx] + mul qword ptr [64+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rbx, r12 + add r9, r8 + adc r11, r13 + adc rbx, r12 + mov rax, qword ptr [32+rcx] + mul rax + add r9, rax + adc r11, rdx + adc rbx, 0 + mov rdi, r13 + mov qword ptr [-480+rbp], r9 + mov rsi, r12 + mov r9, rbx + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [72+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r11, r8 + adc r9, rdi + adc r12, rsi + add r11, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-472+rbp], r11 + mov rbx, r12 + mov rax, qword ptr [rcx] + mul qword ptr [80+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc rbx, r13 + adc rax, r12 + add r9, r8 + adc rbx, r13 + adc rax, r12 + mov rdx, rax + mov r11, rbx + mov rdi, r13 + mov rbx, rdx + mov rsi, r12 + mov rax, qword ptr [40+rcx] + mul rax + add r9, rax + adc r11, rdx + adc rbx, 0 + mov qword ptr [-464+rbp], r9 + mov r9, rbx + mov rax, qword ptr [rcx] + mul qword ptr [88+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add r11, r8 + adc r9, rdi + adc rdx, rsi + add r11, r8 + adc r9, rdi + adc rdx, rsi + mov r13, rdx + mov qword ptr [-456+rbp], r11 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [96+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, rdi + mov r11, rsi + add r9, r8 + adc r12, rax + adc r13, r11 + add r9, r8 + adc r12, rax + adc r13, r11 + mov rbx, rax + mov rsi, r11 + mov rax, qword ptr [48+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rdi, rbx + mov qword ptr [-448+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [104+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc r13, rsi + add r12, r8 + adc r9, rdi + adc r13, rsi + mov qword ptr [-440+rbp], r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [112+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r13 + mov rbx, rdi + mov r13, rsi + add r9, r8 + adc rdx, rbx + adc r12, r13 + add r9, r8 + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov rdi, rbx + mov rsi, r11 + mov rax, qword ptr [56+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-432+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r8 + mov rdx, rdi + mov rbx, rsi + add r12, rax + adc r9, rdx + adc r13, rbx + add r12, rax + adc r9, rdx + adc r13, rbx + mov qword ptr [-424+rbp], r12 + mov r8, rdx + mov rsi, rax + mov rdi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [128+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [80+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [72+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [64+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-416+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [136+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-408+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [144+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [80+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [72+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-400+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [152+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-392+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [160+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [80+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-384+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [168+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [152+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-376+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [176+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [88+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-368+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [184+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [152+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov rdi, rdx + mov qword ptr [-360+rbp], r12 + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [192+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rbx, r8 + mov rax, rdi + add r9, rsi + adc r12, rbx + adc r13, rax + add r9, rsi + adc r12, rbx + adc r13, rax + mov r11, rax + mov r8, rbx + mov rax, qword ptr [96+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rdi, r11 + mov qword ptr [-352+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [200+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-344+rbp], r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [208+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rdx, r13 + mov rbx, r8 + mov r13, rdi + add r9, rsi + adc rdx, rbx + adc r12, r13 + add r9, rsi + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov r8, rbx + mov rdi, r11 + mov rax, qword ptr [104+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-336+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [216+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-328+rbp], r12 + mov rax, qword ptr [rcx] + mul qword ptr [224+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r13 + mov rdx, r10 + mov rbx, r8 + mov r12, rdi + add r9, rsi + adc rax, rbx + adc rdx, r12 + add r9, rsi + adc rax, rbx + adc rdx, r12 + mov rdi, rdx + mov r11, r12 + mov r8, rbx + mov r12, rax + mov r13, rdi + mov rdi, r11 + mov rax, qword ptr [112+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-320+rbp], r9 + mov rbx, r13 + mov r9, r10 + mov rax, qword ptr [rcx] + mul qword ptr [232+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc rbx, r8 + adc r9, rdi + add r12, rsi + adc rbx, r8 + adc r9, rdi + mov qword ptr [-312+rbp], r12 + mov r13, r9 + mov rax, qword ptr [rcx] + mul qword ptr [240+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r10 + mov r11, r8 + mov rdx, rdi + add rbx, rsi + adc r13, r11 + adc rax, rdx + add rbx, rsi + adc r13, r11 + adc rax, rdx + mov r9, rdx + mov rdx, rax + mov r12, r13 + mov r8, r11 + mov r13, rdx + mov rdi, r9 + mov rax, qword ptr [120+rcx] + mul rax + add rbx, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-304+rbp], rbx + mov rbx, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc rbx, r8 + adc r13, rdi + add r12, rsi + adc rbx, r8 + adc r13, rdi + mov qword ptr [-296+rbp], r12 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [16+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov r11, r8 + mov rax, rdi + add rbx, rsi + adc r12, r11 + adc r13, rax + add rbx, rsi + adc r12, r11 + adc r13, rax + mov r9, rax + mov r8, r11 + mov rax, qword ptr [128+rcx] + mul rax + add rbx, rax + adc r12, rdx + adc r13, 0 + mov rdi, r9 + mov qword ptr [-288+rbp], rbx + mov r9, r13 + mov rax, qword ptr [16+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov r13, r10 + mov rax, qword ptr [24+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-280+rbp], r12 + mov r12, r10 + mov rax, qword ptr [24+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [32+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rdx, r13 + mov rbx, r8 + mov r13, rdi + add r9, rsi + adc rdx, rbx + adc r12, r13 + add r9, rsi + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov r8, rbx + mov rdi, r11 + mov rax, qword ptr [136+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-272+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [32+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [40+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-264+rbp], r12 + mov rax, qword ptr [40+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [48+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r13 + mov rdx, r10 + mov rbx, r8 + mov r12, rdi + add r9, rsi + adc rax, rbx + adc rdx, r12 + add r9, rsi + adc rax, rbx + adc rdx, r12 + mov rdi, rdx + mov r11, r12 + mov r8, rbx + mov r12, rax + mov r13, rdi + mov rdi, r11 + mov rax, qword ptr [144+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov r11, r10 + mov qword ptr [-256+rbp], r9 + mov r9, r13 + mov rax, qword ptr [48+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [56+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r11, rdi + add r12, rsi + adc r9, r8 + adc r11, rdi + mov qword ptr [-248+rbp], r12 + mov r13, r11 + mov rax, qword ptr [56+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [64+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r10 + mov rdx, rsi + mov rbx, r8 + mov r12, rdi + add r9, rdx + adc r13, rbx + adc rax, r12 + add r9, rdx + adc r13, rbx + adc rax, r12 + mov r11, r12 + mov r8, rdx + mov rdx, rax + mov r12, r13 + mov rdi, rbx + mov r13, rdx + mov rsi, r11 + mov rax, qword ptr [152+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-240+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [64+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [72+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r8 + mov rdx, rdi + mov rbx, rsi + add r12, rax + adc r9, rdx + adc r13, rbx + add r12, rax + adc r9, rdx + adc r13, rbx + mov qword ptr [-232+rbp], r12 + mov r8, rdx + mov rsi, rax + mov rdi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [72+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [80+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [160+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-224+rbp], r9 + mov r9, r13 + mov rax, qword ptr [80+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-216+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [96+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [168+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-208+rbp], r9 + mov r9, r13 + mov rax, qword ptr [96+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [104+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-200+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [104+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [112+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [176+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-192+rbp], r9 + mov r9, r13 + mov rax, qword ptr [112+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [120+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-184+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [120+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [128+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [184+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-176+rbp], r9 + mov r9, r13 + mov rax, qword ptr [128+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [136+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc r13, rsi + add r12, r8 + adc r9, rdi + adc r13, rsi + mov qword ptr [-168+rbp], r12 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [136+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [144+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, rdi + mov rax, rsi + add r9, r8 + adc r12, rbx + adc r13, rax + add r9, r8 + adc r12, rbx + adc r13, rax + mov r11, rax + mov rdi, rbx + mov rbx, r10 + mov rax, qword ptr [192+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-160+rbp], r9 + mov r9, r13 + mov rax, qword ptr [144+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [152+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc rbx, rsi + add r12, r8 + adc r9, rdi + adc rbx, rsi + mov qword ptr [-152+rbp], r12 + mov rax, qword ptr [152+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [160+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc rbx, r13 + adc rdx, r12 + add r9, r8 + adc rbx, r13 + adc rdx, r12 + mov rax, rdx + mov rdi, r13 + mov rsi, r12 + mov r11, rax + mov r12, r10 + mov rax, qword ptr [200+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-144+rbp], r9 + mov r9, r11 + mov rax, qword ptr [160+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [168+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r12, rsi + add rbx, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-136+rbp], rbx + mov r11, r12 + mov rax, qword ptr [168+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [176+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [208+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-128+rbp], r9 + mov r9, r11 + mov rax, qword ptr [176+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [184+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add rbx, r8 + adc r9, rdi + adc rdx, rsi + add rbx, r8 + adc r9, rdi + adc rdx, rsi + mov qword ptr [-120+rbp], rbx + mov r11, rdx + mov rbx, r10 + mov rax, qword ptr [184+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [192+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rbx, r12 + add r9, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r10 + mov rax, qword ptr [216+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-112+rbp], r9 + mov r9, r11 + mov rax, qword ptr [192+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [200+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [216+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r12, rsi + add rbx, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-104+rbp], rbx + mov r11, r12 + mov rax, qword ptr [200+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [208+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [216+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov r12, r10 + mov rax, qword ptr [224+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-96+rbp], r9 + mov r9, r10 + mov rax, qword ptr [208+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [216+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [224+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov rax, rsi + add rbx, r8 + adc r11, r13 + adc r9, rax + add rbx, r8 + adc r11, r13 + adc r9, rax + mov qword ptr [-88+rbp], rbx + mov rsi, r11 + mov r8, r9 + mov rax, qword ptr [216+rcx] + mul qword ptr [248+rcx] + add rsi, rax + adc r8, rdx + adc r12, 0 + add rsi, rax + adc r8, rdx + adc r12, 0 + mov r11, r12 + mov rax, qword ptr [224+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc r11, 0 + add rsi, rax + adc r8, rdx + adc r11, 0 + mov r13, r8 + mov rbx, r11 + mov rax, qword ptr [232+rcx] + mul rax + add rsi, rax + adc r13, rdx + adc rbx, 0 + mov qword ptr [-80+rbp], rsi + mov r12, rbx + mov rdi, r13 + mov r13, r10 + mov rax, qword ptr [224+rcx] + mul qword ptr [248+rcx] + add rdi, rax + adc r12, rdx + adc r13, 0 + add rdi, rax + adc r12, rdx + adc r13, 0 + mov r9, r12 + mov r12, r13 + mov rax, qword ptr [232+rcx] + mul qword ptr [240+rcx] + add rdi, rax + adc r9, rdx + adc r12, 0 + add rdi, rax + adc r9, rdx + adc r12, 0 + mov qword ptr [-72+rbp], rdi + mov r11, r9 + mov rbx, r12 + mov r9, r10 + mov rax, qword ptr [232+rcx] + mul qword ptr [248+rcx] + add r11, rax + adc rbx, rdx + adc r9, 0 + add r11, rax + adc rbx, rdx + adc r9, 0 + mov r13, rbx + mov rbx, r9 + mov r9, r10 + mov rax, qword ptr [240+rcx] + mul rax + add r11, rax + adc r13, rdx + adc rbx, 0 + mov qword ptr [-64+rbp], r11 + mov rdi, r13 + mov rsi, rbx + mov rax, qword ptr [240+rcx] + mul qword ptr [248+rcx] + add rdi, rax + adc rsi, rdx + adc r9, 0 + add rdi, rax + adc rsi, rdx + adc r9, 0 + mov qword ptr [-56+rbp], rdi + mov r8, r9 + mov rax, qword ptr [248+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r10, 0 + mov qword ptr [-48+rbp], rsi + mov qword ptr [-40+rbp], r8 + mov dword ptr [8+r14], 64 + mov dword ptr [r14], 0 +; mov rdi, qword ptr [16+r14] +; lea rsi, qword ptr [-544+rbp] +; mov edx, 512 + mov rcx, qword ptr [16+r14] + lea rdx, qword ptr [-544+rbp] + mov r8d, 512 + call memcpy + mov edx, dword ptr [8+r14] + test edx, edx + je L304 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r14] + mov r10d, ecx + cmp qword ptr [rsi+r10*8], 0 + jne L302 + mov edx, ecx + ALIGN 16 +L303: + test edx, edx + mov ecx, edx + je L307 + dec edx + mov eax, edx + cmp qword ptr [rsi+rax*8], 0 + je L303 + mov dword ptr [8+r14], ecx + mov edx, ecx +L302: + test edx, edx + je L304 + mov eax, dword ptr [r14] + jmp L305 + +L307: + mov dword ptr [8+r14], edx +L304: + xor eax, eax +L305: + mov dword ptr [r14], eax + add rsp, 512+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop r14 + pop rbp + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_32 ENDP + +END diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s new file mode 100644 index 0000000000..a5181df332 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s @@ -0,0 +1,16097 @@ +//* TomsFastMath, a fast ISO C bignum library. +/ * +/ * This project is meant to fill in where LibTomMath +/ * falls short. That is speed ;-) +/ * +/ * This project is public domain and free for all purposes. +/ * +/ * Tom St Denis, tomstdenis@iahu.ca +/ */ + +//* +/ * The source file from which this assembly was derived +/ * comes from TFM v0.03, which has the above license. +/ * This source was compiled with an unnamed compiler at +/ * the highest optimization level. Afterwards, the +/ * trailing .section was removed because it causes errors +/ * in the Studio 10 compiler on AMD 64. +/ */ + + .file "mp_comba.c" + .text + .align 16 +.globl s_mp_mul_comba_4 + .type s_mp_mul_comba_4, @function +s_mp_mul_comba_4: +.LFB2: + pushq %r12 +.LCFI0: + pushq %rbp +.LCFI1: + pushq %rbx +.LCFI2: + movq 16(%rdi), %r9 + movq %rdx, %rbx + movq 16(%rsi), %rdx + movq (%r9), %rax + movq %rax, -64(%rsp) + movq 8(%r9), %r8 + movq %r8, -56(%rsp) + movq 16(%r9), %rbp + movq %rbp, -48(%rsp) + movq 24(%r9), %r12 + movq %r12, -40(%rsp) + movq (%rdx), %rcx + movq %rcx, -32(%rsp) + movq 8(%rdx), %r10 + movq %r10, -24(%rsp) + movq 16(%rdx), %r11 + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %r11, -16(%rsp) + movq 16(%rbx), %r11 + movq 24(%rdx), %rax + movq %rax, -8(%rsp) +/APP + movq -64(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -64(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -56(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -64(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -56(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -48(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -64(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -40(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -56(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -40(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -48(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq -40(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq -40(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 48(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 56(%r11) + movl $8, 8(%rbx) + jne .L9 + .align 16 +.L18: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L9 + leal -2(%rdx), %r10d + cmpq $0, (%r11,%r10,8) + je .L18 +.L9: + movl 8(%rbx), %edx + xorl %r11d, %r11d + testl %edx, %edx + cmovne %esi, %r11d + movl %r11d, (%rbx) + popq %rbx + popq %rbp + popq %r12 + ret +.LFE2: + .size s_mp_mul_comba_4, .-s_mp_mul_comba_4 + .align 16 +.globl s_mp_mul_comba_8 + .type s_mp_mul_comba_8, @function +s_mp_mul_comba_8: +.LFB3: + pushq %r12 +.LCFI3: + pushq %rbp +.LCFI4: + pushq %rbx +.LCFI5: + movq %rdx, %rbx + subq $8, %rsp +.LCFI6: + movq 16(%rdi), %rdx + movq (%rdx), %r8 + movq %r8, -120(%rsp) + movq 8(%rdx), %rbp + movq %rbp, -112(%rsp) + movq 16(%rdx), %r9 + movq %r9, -104(%rsp) + movq 24(%rdx), %r12 + movq %r12, -96(%rsp) + movq 32(%rdx), %rcx + movq %rcx, -88(%rsp) + movq 40(%rdx), %r10 + movq %r10, -80(%rsp) + movq 48(%rdx), %r11 + movq %r11, -72(%rsp) + movq 56(%rdx), %rax + movq 16(%rsi), %rdx + movq %rax, -64(%rsp) + movq (%rdx), %r8 + movq %r8, -56(%rsp) + movq 8(%rdx), %rbp + movq %rbp, -48(%rsp) + movq 16(%rdx), %r9 + movq %r9, -40(%rsp) + movq 24(%rdx), %r12 + movq %r12, -32(%rsp) + movq 32(%rdx), %rcx + movq %rcx, -24(%rsp) + movq 40(%rdx), %r10 + movq %r10, -16(%rsp) + movq 48(%rdx), %r11 + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %r11, -8(%rsp) + movq 16(%rbx), %r11 + movq 56(%rdx), %rax + movq %rax, (%rsp) +/APP + movq -120(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -120(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -112(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -120(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -112(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -104(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -96(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -88(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -80(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -72(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 48(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 56(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -112(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 64(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -104(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 72(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -96(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 80(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -88(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 88(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -80(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 96(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -72(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq -64(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 104(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq -64(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 112(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 120(%r11) + movl $16, 8(%rbx) + jne .L35 + .align 16 +.L43: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L35 + leal -2(%rdx), %eax + cmpq $0, (%r11,%rax,8) + je .L43 +.L35: + movl 8(%rbx), %r11d + xorl %edx, %edx + testl %r11d, %r11d + cmovne %esi, %edx + movl %edx, (%rbx) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + ret +.LFE3: + .size s_mp_mul_comba_8, .-s_mp_mul_comba_8 + .align 16 +.globl s_mp_mul_comba_16 + .type s_mp_mul_comba_16, @function +s_mp_mul_comba_16: +.LFB4: + pushq %r12 +.LCFI7: + pushq %rbp +.LCFI8: + pushq %rbx +.LCFI9: + movq %rdx, %rbx + subq $136, %rsp +.LCFI10: + movq 16(%rdi), %rax + movq (%rax), %r8 + movq %r8, -120(%rsp) + movq 8(%rax), %rbp + movq %rbp, -112(%rsp) + movq 16(%rax), %r9 + movq %r9, -104(%rsp) + movq 24(%rax), %r12 + movq %r12, -96(%rsp) + movq 32(%rax), %rcx + movq %rcx, -88(%rsp) + movq 40(%rax), %r10 + movq %r10, -80(%rsp) + movq 48(%rax), %rdx + movq %rdx, -72(%rsp) + movq 56(%rax), %r11 + movq %r11, -64(%rsp) + movq 64(%rax), %r8 + movq %r8, -56(%rsp) + movq 72(%rax), %rbp + movq %rbp, -48(%rsp) + movq 80(%rax), %r9 + movq %r9, -40(%rsp) + movq 88(%rax), %r12 + movq %r12, -32(%rsp) + movq 96(%rax), %rcx + movq %rcx, -24(%rsp) + movq 104(%rax), %r10 + movq %r10, -16(%rsp) + movq 112(%rax), %rdx + movq %rdx, -8(%rsp) + movq 120(%rax), %r11 + movq %r11, (%rsp) + movq 16(%rsi), %r11 + movq (%r11), %r8 + movq %r8, 8(%rsp) + movq 8(%r11), %rbp + movq %rbp, 16(%rsp) + movq 16(%r11), %r9 + movq %r9, 24(%rsp) + movq 24(%r11), %r12 + movq %r12, 32(%rsp) + movq 32(%r11), %rcx + movq %rcx, 40(%rsp) + movq 40(%r11), %r10 + movq %r10, 48(%rsp) + movq 48(%r11), %rdx + movq %rdx, 56(%rsp) + movq 56(%r11), %rax + movq %rax, 64(%rsp) + movq 64(%r11), %r8 + movq %r8, 72(%rsp) + movq 72(%r11), %rbp + movq %rbp, 80(%rsp) + movq 80(%r11), %r9 + movq %r9, 88(%rsp) + movq 88(%r11), %r12 + movq %r12, 96(%rsp) + movq 96(%r11), %rcx + movq %rcx, 104(%rsp) + movq 104(%r11), %r10 + movq %r10, 112(%rsp) + movq 112(%r11), %rdx + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %rdx, 120(%rsp) + movq 120(%r11), %rax + movq %rax, 128(%rsp) + movq 16(%rbx), %r11 +/APP + movq -120(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -120(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -112(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -120(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -112(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -104(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -96(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -88(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -80(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -72(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 48(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 56(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -56(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 64(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -48(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 72(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -40(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 80(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -32(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 88(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -24(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 96(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -16(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 104(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -8(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 112(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 120(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -112(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 128(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -104(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 136(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -96(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 144(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -88(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 152(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -80(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 160(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -72(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 168(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -64(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 176(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -56(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 184(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -48(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 192(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -40(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 200(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -32(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 208(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -24(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 216(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -16(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 224(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -8(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq (%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 232(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq (%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 240(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 248(%r11) + movl $32, 8(%rbx) + jne .L76 + .align 16 +.L84: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L76 + leal -2(%rdx), %eax + cmpq $0, (%r11,%rax,8) + je .L84 +.L76: + movl 8(%rbx), %edx + xorl %r11d, %r11d + testl %edx, %edx + cmovne %esi, %r11d + movl %r11d, (%rbx) + addq $136, %rsp + popq %rbx + popq %rbp + popq %r12 + ret +.LFE4: + .size s_mp_mul_comba_16, .-s_mp_mul_comba_16 + .align 16 +.globl s_mp_mul_comba_32 + .type s_mp_mul_comba_32, @function +s_mp_mul_comba_32: +.LFB5: + pushq %rbp +.LCFI11: + movq %rsp, %rbp +.LCFI12: + pushq %r13 +.LCFI13: + movq %rdx, %r13 + movl $256, %edx + pushq %r12 +.LCFI14: + movq %rsi, %r12 + pushq %rbx +.LCFI15: + movq %rdi, %rbx + subq $520, %rsp +.LCFI16: + movq 16(%rdi), %rsi + leaq -544(%rbp), %rdi + call memcpy@PLT + movq 16(%r12), %rsi + leaq -288(%rbp), %rdi + movl $256, %edx + call memcpy@PLT + movq 16(%r13), %r9 + xorl %r8d, %r8d + movq %r8, %rsi + movq %r8, %rdi + movq %r8, %r10 +/APP + movq -544(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%r10 + +/NO_APP + movq %rsi, (%r9) + movq %r10, %rsi + movq %r8, %r10 +/APP + movq -544(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %r10, %r11 +/APP + movq -536(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r11 + +/NO_APP + movq %rdi, 8(%r9) + movq %r11, %rdi + movq %r8, %r11 +/APP + movq -544(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %r11, %rcx +/APP + movq -536(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -528(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 16(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -520(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 24(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -512(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 32(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -504(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 40(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -496(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 48(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -488(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 56(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -480(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 64(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -472(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 72(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -464(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 80(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -456(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 88(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -448(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 96(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -440(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 104(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -432(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 112(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -424(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 120(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -416(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 128(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -408(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 136(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -400(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 144(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -392(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 152(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -384(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 160(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -376(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 168(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -368(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 176(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -360(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 184(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -352(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 192(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -344(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 200(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -336(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 208(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -328(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 216(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -320(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 224(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -312(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 232(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -304(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 240(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 248(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -536(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 256(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -528(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 264(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -520(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 272(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -512(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 280(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -504(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 288(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -496(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 296(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -488(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 304(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -480(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 312(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -472(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 320(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -464(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 328(%r9) + movq %r11, %rdi + movq %r10, %r11 + movq %r8, %r10 +/APP + movq -456(%rbp),%rax + mulq -40(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -448(%rbp),%rax + mulq -48(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -440(%rbp),%rax + mulq -56(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -432(%rbp),%rax + mulq -64(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -424(%rbp),%rax + mulq -72(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -416(%rbp),%rax + mulq -80(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -408(%rbp),%rax + mulq -88(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -400(%rbp),%rax + mulq -96(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -392(%rbp),%rax + mulq -104(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -384(%rbp),%rax + mulq -112(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -376(%rbp),%rax + mulq -120(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -368(%rbp),%rax + mulq -128(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -360(%rbp),%rax + mulq -136(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -352(%rbp),%rax + mulq -144(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -344(%rbp),%rax + mulq -152(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -336(%rbp),%rax + mulq -160(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -328(%rbp),%rax + mulq -168(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -320(%rbp),%rax + mulq -176(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -312(%rbp),%rax + mulq -184(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -304(%rbp),%rax + mulq -192(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -296(%rbp),%rax + mulq -200(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + +/NO_APP + movq %r11, 336(%r9) + movq %r10, %rsi + movq %r8, %r10 +/APP + movq -448(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %r10, %rcx +/APP + movq -440(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + +/NO_APP + movq %rsi, %r11 + movq %rcx, %r10 +/APP + movq -296(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rdi + adcq %rdx,%r11 + adcq $0,%r10 + +/NO_APP + movq %rdi, 344(%r9) + movq %r11, %rcx + movq %r10, %rdi + movq %r8, %r11 +/APP + movq -440(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %r11, %rsi +/APP + movq -432(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 352(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -432(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 360(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -424(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 368(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -416(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 376(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -408(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 384(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -400(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 392(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -392(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 400(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -384(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 408(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -376(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 416(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -368(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 424(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -360(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 432(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -352(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 440(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -344(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 448(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -336(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 456(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -328(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 464(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -320(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 472(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -312(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 480(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -304(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rcx, %r11 + movq %rdi, %r10 +/APP + movq -296(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 488(%r9) + movq %r10, %rcx + movq %r11, %rsi +/APP + movq -296(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rsi + adcq $0,%r8 + +/NO_APP + movq %rcx, 496(%r9) + movl (%r12), %ecx + xorl (%rbx), %ecx + testq %rsi, %rsi + movq %rsi, 504(%r9) + movl $64, 8(%r13) + jne .L149 + .align 16 +.L157: + movl 8(%r13), %edx + leal -1(%rdx), %ebx + testl %ebx, %ebx + movl %ebx, 8(%r13) + je .L149 + leal -2(%rdx), %r12d + cmpq $0, (%r9,%r12,8) + je .L157 +.L149: + movl 8(%r13), %r9d + xorl %edx, %edx + testl %r9d, %r9d + cmovne %ecx, %edx + movl %edx, (%r13) + addq $520, %rsp + popq %rbx + popq %r12 + popq %r13 + leave + ret +.LFE5: + .size s_mp_mul_comba_32, .-s_mp_mul_comba_32 + .align 16 +.globl s_mp_sqr_comba_4 + .type s_mp_sqr_comba_4, @function +s_mp_sqr_comba_4: +.LFB6: + pushq %rbp +.LCFI17: + movq %rsi, %r11 + xorl %esi, %esi + movq %rsi, %r10 + movq %rsi, %rbp + movq %rsi, %r8 + pushq %rbx +.LCFI18: + movq %rsi, %rbx + movq 16(%rdi), %rcx + movq %rsi, %rdi +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%rdi + +/NO_APP + movq %r10, -72(%rsp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rdi + adcq $0,%rbp + addq %rax,%rbx + adcq %rdx,%rdi + adcq $0,%rbp + +/NO_APP + movq %rbx, -64(%rsp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rdi + adcq %rdx,%rbp + adcq $0,%r8 + addq %rax,%rdi + adcq %rdx,%rbp + adcq $0,%r8 + +/NO_APP + movq %rbp, %rbx + movq %r8, %rbp +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %rdi, -56(%rsp) + movq %rbp, %r9 + movq %rbx, %r8 + movq %rsi, %rdi +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rdi + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rdi + +/NO_APP + movq %r9, %rbx + movq %rdi, %rbp +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rbp + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %r8, -48(%rsp) + movq %rbp, %r9 + movq %rbx, %rdi + movq %rsi, %r8 + movl $8, 8(%r11) + movl $0, (%r11) +/APP + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r8 + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbx + movq %r8, %rbp +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %rbp, %rax + movq %rdi, -40(%rsp) + movq %rbx, %rbp + movq %rax, %rdi + movq %rsi, %rbx +/APP + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%rbp + adcq %rdx,%rdi + adcq $0,%rbx + addq %rax,%rbp + adcq %rdx,%rdi + adcq $0,%rbx + +/NO_APP + movq %rbp, -32(%rsp) + movq %rbx, %r9 +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%rsi + +/NO_APP + movq 16(%r11), %rdx + movq %rdi, -24(%rsp) + movq %r9, -16(%rsp) + movq %r10, (%rdx) + movq -64(%rsp), %r8 + movq %r8, 8(%rdx) + movq -56(%rsp), %rbp + movq %rbp, 16(%rdx) + movq -48(%rsp), %rdi + movq %rdi, 24(%rdx) + movq -40(%rsp), %rsi + movq %rsi, 32(%rdx) + movq -32(%rsp), %rbx + movq %rbx, 40(%rdx) + movq -24(%rsp), %rcx + movq %rcx, 48(%rdx) + movq -16(%rsp), %rax + movq %rax, 56(%rdx) + movl 8(%r11), %edx + testl %edx, %edx + je .L168 + leal -1(%rdx), %ecx + movq 16(%r11), %rsi + mov %ecx, %r10d + cmpq $0, (%rsi,%r10,8) + jne .L166 + movl %ecx, %edx + .align 16 +.L167: + testl %edx, %edx + movl %edx, %ecx + je .L171 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L167 + movl %ecx, 8(%r11) + movl %ecx, %edx +.L166: + testl %edx, %edx + je .L168 + popq %rbx + popq %rbp + movl (%r11), %eax + movl %eax, (%r11) + ret +.L171: + movl %edx, 8(%r11) + .align 16 +.L168: + popq %rbx + popq %rbp + xorl %eax, %eax + movl %eax, (%r11) + ret +.LFE6: + .size s_mp_sqr_comba_4, .-s_mp_sqr_comba_4 + .align 16 +.globl s_mp_sqr_comba_8 + .type s_mp_sqr_comba_8, @function +s_mp_sqr_comba_8: +.LFB7: + pushq %r14 +.LCFI19: + xorl %r9d, %r9d + movq %r9, %r14 + movq %r9, %r10 + pushq %r13 +.LCFI20: + movq %r9, %r13 + pushq %r12 +.LCFI21: + movq %r9, %r12 + pushq %rbp +.LCFI22: + movq %rsi, %rbp + movq %r9, %rsi + pushq %rbx +.LCFI23: + movq %r9, %rbx + subq $8, %rsp +.LCFI24: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r14 + adcq %rdx,%rbx + adcq $0,%r12 + +/NO_APP + movq %r14, -120(%rsp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r10 + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r10 + +/NO_APP + movq %rbx, -112(%rsp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%r13 + +/NO_APP + movq %r10, %rbx + movq %r13, %r10 + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r12, -104(%rsp) + movq %r10, %rdi + movq %rbx, %r11 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%rsi + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rbx + movq %rsi, %r10 + movq %r9, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r10 + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r9, %rsi + movq %r11, -96(%rsp) + movq %r10, %r8 + movq %rbx, %r12 + movq %r9, %r11 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + +/NO_APP + movq %r8, %rbx + movq %r13, %r10 + movq %r9, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r12, -88(%rsp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -80(%rsp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -72(%rsp) + movq %r11, %r10 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rax + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rax + +/NO_APP + movq %rbx, -64(%rsp) + movq %rax, %r11 + movq %r9, %rbx +/APP + movq 8(%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rsi + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %r13 + movq %rsi, %r11 +/APP + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -56(%rsp) + movq %r9, %r10 +/APP + movq 16(%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %r13,%r13 + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%r13 + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%r13 + +/NO_APP + movq %rdi, %r12 + movq %r13, %rax +/APP + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + +/NO_APP + movq %rbx, -48(%rsp) + movq %r11, %r12 + movq %r10, %rsi + movq %r9, %rbx + movq %r9, %r11 +/APP + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%rbx + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%rbx + +/NO_APP + movq %rbx, %r13 +/APP + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + +/NO_APP + movq %rsi, %r10 + movq %r13, %rbx + movq %r9, %r13 +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %r12, -40(%rsp) + movq %rbx, %r8 + movq %r10, %rdi +/APP + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%rdi + adcq %rdx,%r8 + adcq $0,%r11 + addq %rax,%rdi + adcq %rdx,%r8 + adcq $0,%r11 + +/NO_APP + movq %r8, %r10 + movq %r11, %rbx +/APP + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%rdi + adcq %rdx,%r10 + adcq $0,%rbx + addq %rax,%rdi + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %rdi, -32(%rsp) + movq %rbx, %rsi + movq %r10, %r12 +/APP + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + +/NO_APP + movq %rsi, %r10 + movq %r13, %rbx +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %r12, -24(%rsp) + movq %r10, %rdi + movq %rbx, %rsi + movq %r9, %r10 + movl $16, 8(%rbp) + movl $0, (%rbp) +/APP + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %rdi, -16(%rsp) + movq %r10, %r8 +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r9 + +/NO_APP + movq 16(%rbp), %rax + movq %rsi, -8(%rsp) + movq %r8, (%rsp) + movq %r14, (%rax) + movq -112(%rsp), %rbx + movq %rbx, 8(%rax) + movq -104(%rsp), %rcx + movq %rcx, 16(%rax) + movq -96(%rsp), %rdx + movq %rdx, 24(%rax) + movq -88(%rsp), %r14 + movq %r14, 32(%rax) + movq -80(%rsp), %r13 + movq %r13, 40(%rax) + movq -72(%rsp), %r12 + movq %r12, 48(%rax) + movq -64(%rsp), %r11 + movq %r11, 56(%rax) + movq -56(%rsp), %r10 + movq %r10, 64(%rax) + movq -48(%rsp), %r9 + movq %r9, 72(%rax) + movq -40(%rsp), %r8 + movq %r8, 80(%rax) + movq -32(%rsp), %rdi + movq %rdi, 88(%rax) + movq -24(%rsp), %rsi + movq %rsi, 96(%rax) + movq -16(%rsp), %rbx + movq %rbx, 104(%rax) + movq -8(%rsp), %rcx + movq %rcx, 112(%rax) + movq (%rsp), %rdx + movq %rdx, 120(%rax) + movl 8(%rbp), %edx + testl %edx, %edx + je .L192 + leal -1(%rdx), %ecx + movq 16(%rbp), %rsi + mov %ecx, %r14d + cmpq $0, (%rsi,%r14,8) + jne .L190 + movl %ecx, %edx + .align 16 +.L191: + testl %edx, %edx + movl %edx, %ecx + je .L195 + decl %edx + mov %edx, %r9d + cmpq $0, (%rsi,%r9,8) + je .L191 + movl %ecx, 8(%rbp) + movl %ecx, %edx +.L190: + testl %edx, %edx + je .L192 + movl (%rbp), %eax + movl %eax, (%rbp) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + ret +.L195: + movl %edx, 8(%rbp) + .align 16 +.L192: + xorl %eax, %eax + movl %eax, (%rbp) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + ret +.LFE7: + .size s_mp_sqr_comba_8, .-s_mp_sqr_comba_8 + .align 16 +.globl s_mp_sqr_comba_16 + .type s_mp_sqr_comba_16, @function +s_mp_sqr_comba_16: +.LFB8: + pushq %rbp +.LCFI25: + xorl %r9d, %r9d + movq %r9, %r8 + movq %r9, %r11 + movq %rsp, %rbp +.LCFI26: + pushq %r14 +.LCFI27: + movq %rsi, %r14 + movq %r9, %rsi + pushq %r13 +.LCFI28: + movq %r9, %r13 + pushq %r12 +.LCFI29: + movq %r9, %r12 + pushq %rbx +.LCFI30: + movq %r9, %rbx + subq $256, %rsp +.LCFI31: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rsi + +/NO_APP + movq %r8, -288(%rbp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + +/NO_APP + movq %rbx, -280(%rbp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %rbx + movq %r13, %r10 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %rsi, -272(%rbp) + movq %r10, %rdi + movq %r9, %rsi + movq %rbx, %r10 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r10 + adcq %rdx,%rdi + adcq $0,%r11 + addq %rax,%r10 + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %rdi, %r12 + movq %r11, %rbx + movq %r9, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + +/NO_APP + movq %r9, %r11 + movq %r10, -264(%rbp) + movq %rbx, %r8 + movq %r12, %r13 + movq %r9, %r12 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r8, %rbx + movq %r12, %r10 + movq %r9, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r13, -256(%rbp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -248(%rbp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -240(%rbp) + movq %r11, %r10 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r11 + movq %rbx, -232(%rbp) + movq %r9, %rbx +/APP + movq (%rcx),%rax + mulq 64(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r13, %rdi + movq %r10, -224(%rbp) + movq %r12, %rsi + movq %rbx, %r10 + movq %r9, %r12 +/APP + movq (%rcx),%rax + mulq 72(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %r11, -216(%rbp) + movq %r12, %rbx +/APP + movq (%rcx),%rax + mulq 80(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%rbx + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%rbx + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %rbx, %r11 + movq %r13, %rdi + movq %rdx, %rbx + movq %r12, %rsi +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r10, -208(%rbp) + movq %rbx, %r10 +/APP + movq (%rcx),%rax + mulq 88(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r13 + movq %r11, -200(%rbp) + movq %r13, %r12 +/APP + movq (%rcx),%rax + mulq 96(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rdx + movq %rsi, %r11 +/APP + addq %r8,%r10 + adcq %rdx,%r12 + adcq %r11,%rax + addq %r8,%r10 + adcq %rdx,%r12 + adcq %r11,%rax + +/NO_APP + movq %rdx, %rbx + movq %rax, %r13 + movq %r11, %rsi +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, %rdi + movq %r10, -192(%rbp) + movq %r13, %r10 +/APP + movq (%rcx),%rax + mulq 104(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -184(%rbp) + movq %r13, %r12 +/APP + movq (%rcx),%rax + mulq 112(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rbx + movq %rsi, %rdx +/APP + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r11 + movq %rax, %r13 + movq %rbx, %rdi +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r10, -176(%rbp) + movq %r13, %r10 +/APP + movq (%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -168(%rbp) + movq %r13, %r12 +/APP + movq 8(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 16(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rbx + movq %rsi, %rdx +/APP + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r11 + movq %rax, %r13 + movq %rbx, %rdi +/APP + movq 64(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r10, -160(%rbp) + movq %r9, %r11 +/APP + movq 16(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r13, %r10 + movq %r9, %rbx +/APP + movq 24(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %r12, -152(%rbp) +/APP + movq 24(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 32(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r9, %r12 +/APP + movq 72(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -144(%rbp) + movq %r11, %r10 +/APP + movq 32(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 40(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -136(%rbp) + movq %r12, %r11 +/APP + movq 40(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 48(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 80(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -128(%rbp) + movq %r11, %r10 +/APP + movq 48(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 56(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rbx, -120(%rbp) + movq %rdx, %r11 + movq %r9, %rbx +/APP + movq 56(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 64(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r9, %r12 +/APP + movq 88(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -112(%rbp) + movq %r11, %r10 +/APP + movq 64(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 72(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -104(%rbp) + movq %r12, %r11 +/APP + movq 72(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 80(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 96(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -96(%rbp) + movq %r9, %r10 +/APP + movq 80(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 88(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r12 + movq %rsi, %rax + movq %r9, %rsi +/APP + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + +/NO_APP + movq %r9, %r12 + movq %rbx, -88(%rbp) + movq %r11, %r13 + movq %r10, %r11 +/APP + movq 88(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%r12 + +/NO_APP + movq %r12, %rdi +/APP + movq 96(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%rdi + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%rdi + +/NO_APP + movq %r11, %rbx + movq %rdi, %r10 + movq %r9, %r11 +/APP + movq 104(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r13, -80(%rbp) + movq %r10, %r8 + movq %rbx, %r10 +/APP + movq 96(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%rsi + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%rsi + +/NO_APP + movq %r8, %r12 + movq %rsi, %rbx +/APP + movq 104(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + +/NO_APP + movq %r10, -72(%rbp) + movq %rbx, %r13 + movq %r12, %rbx +/APP + movq 104(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rbx + adcq %rdx,%r13 + adcq $0,%r11 + addq %rax,%rbx + adcq %rdx,%r13 + adcq $0,%r11 + +/NO_APP + movq %r11, %r12 + movq %r13, %r10 +/APP + movq 112(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r10 + adcq $0,%r12 + +/NO_APP + movq %rbx, -64(%rbp) + movq %r10, %rdi + movq %r9, %rbx + movq %r12, %rsi +/APP + movq 112(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rbx + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rbx + +/NO_APP + movq %rdi, -56(%rbp) + movq %rbx, %r8 +/APP + movq 120(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r9 + +/NO_APP + movq %rsi, -48(%rbp) + movq 16(%r14), %rdi + leaq -288(%rbp), %rsi + movl $256, %edx + movq %r8, -40(%rbp) + movl $32, 8(%r14) + movl $0, (%r14) + call memcpy@PLT + movl 8(%r14), %edx + testl %edx, %edx + je .L232 + leal -1(%rdx), %ecx + movq 16(%r14), %rsi + mov %ecx, %r9d + cmpq $0, (%rsi,%r9,8) + jne .L230 + movl %ecx, %edx + .align 16 +.L231: + testl %edx, %edx + movl %edx, %ecx + je .L235 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L231 + movl %ecx, 8(%r14) + movl %ecx, %edx +.L230: + testl %edx, %edx + je .L232 + movl (%r14), %eax + movl %eax, (%r14) + addq $256, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.L235: + movl %edx, 8(%r14) + .align 16 +.L232: + xorl %eax, %eax + movl %eax, (%r14) + addq $256, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.LFE8: + .size s_mp_sqr_comba_16, .-s_mp_sqr_comba_16 + .align 16 +.globl s_mp_sqr_comba_32 + .type s_mp_sqr_comba_32, @function +s_mp_sqr_comba_32: +.LFB9: + pushq %rbp +.LCFI32: + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r11 + movq %rsp, %rbp +.LCFI33: + pushq %r14 +.LCFI34: + movq %rsi, %r14 + movq %r10, %rsi + pushq %r13 +.LCFI35: + movq %r10, %r13 + pushq %r12 +.LCFI36: + movq %r10, %r12 + pushq %rbx +.LCFI37: + movq %r10, %rbx + subq $512, %rsp +.LCFI38: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rsi + +/NO_APP + movq %r8, -544(%rbp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + +/NO_APP + movq %rbx, -536(%rbp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %rbx + movq %r13, %r9 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %rsi, -528(%rbp) + movq %r9, %rdi + movq %r10, %rsi + movq %rbx, %r9 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r9 + adcq %rdx,%rdi + adcq $0,%r11 + addq %rax,%r9 + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %rdi, %r12 + movq %r11, %r13 + movq %r10, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r10, %r11 + movq %r9, -520(%rbp) + movq %r13, %r8 + movq %r12, %r13 + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r8, %rbx + movq %r12, %r9 + movq %r10, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %r13, -512(%rbp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -504(%rbp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -496(%rbp) + movq %r11, %r9 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r11 + movq %rbx, -488(%rbp) + movq %r10, %rbx +/APP + movq (%rcx),%rax + mulq 64(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r13, %rdi + movq %r9, -480(%rbp) + movq %r12, %rsi + movq %rbx, %r9 + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 72(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %r11, -472(%rbp) + movq %r12, %rbx +/APP + movq (%rcx),%rax + mulq 80(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %rbx, %r11 + movq %r13, %rdi + movq %rdx, %rbx + movq %r12, %rsi +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r9, -464(%rbp) + movq %rbx, %r9 +/APP + movq (%rcx),%rax + mulq 88(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r13 + movq %r11, -456(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 96(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rax + movq %rsi, %r11 +/APP + addq %r8,%r9 + adcq %rax,%r12 + adcq %r11,%r13 + addq %r8,%r9 + adcq %rax,%r12 + adcq %r11,%r13 + +/NO_APP + movq %rax, %rbx + movq %r11, %rsi +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, %rdi + movq %r9, -448(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 104(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -440(%rbp) + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 112(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r13, %rdx + movq %rdi, %rbx + movq %rsi, %r13 +/APP + addq %r8,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %r8,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %rdi + movq %r11, %rsi +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -432(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rax + movq %rdi, %rdx + movq %rsi, %rbx +/APP + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + +/NO_APP + movq %r12, -424(%rbp) + movq %rdx, %r8 + movq %rax, %rsi + movq %rbx, %rdi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 128(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 80(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 72(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 64(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -416(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 136(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -408(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 144(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 80(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 72(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -400(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 152(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -392(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 160(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 80(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -384(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 168(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 152(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -376(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 176(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 88(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -368(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 184(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 152(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %rdi + movq %r12, -360(%rbp) + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 192(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax +/APP + addq %rsi,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + addq %rsi,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r11 + movq %rbx, %r8 +/APP + movq 96(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rdi + movq %r9, -352(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 200(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -344(%rbp) + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 208(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rdx + movq %r8, %rbx + movq %rdi, %r13 +/APP + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %r8 + movq %r11, %rdi +/APP + movq 104(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -336(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 216(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -328(%rbp) +/APP + movq (%rcx),%rax + mulq 224(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rax + movq %r10, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rdi + movq %r12, %r11 + movq %rbx, %r8 + movq %rax, %r12 + movq %rdi, %r13 + movq %r11, %rdi +/APP + movq 112(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -320(%rbp) + movq %r13, %rbx + movq %r10, %r9 +/APP + movq (%rcx),%rax + mulq 232(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r9 + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r9 + +/NO_APP + movq %r12, -312(%rbp) + movq %r9, %r13 +/APP + movq (%rcx),%rax + mulq 240(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r10, %rax + movq %r8, %r11 + movq %rdi, %rdx +/APP + addq %rsi,%rbx + adcq %r11,%r13 + adcq %rdx,%rax + addq %rsi,%rbx + adcq %r11,%r13 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r9 + movq %rax, %rdx + movq %r13, %r12 + movq %r11, %r8 + movq %rdx, %r13 + movq %r9, %rdi +/APP + movq 120(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, -304(%rbp) + movq %r13, %rbx + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r13 + +/NO_APP + movq %r12, -296(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 16(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r8, %r11 + movq %rdi, %rax +/APP + addq %rsi,%rbx + adcq %r11,%r12 + adcq %rax,%r13 + addq %rsi,%rbx + adcq %r11,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r9 + movq %r11, %r8 +/APP + movq 128(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, %rdi + movq %rbx, -288(%rbp) + movq %r13, %r9 +/APP + movq 16(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + +/NO_APP + movq %r10, %r13 +/APP + movq 24(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -280(%rbp) + movq %r10, %r12 +/APP + movq 24(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 32(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rdx + movq %r8, %rbx + movq %rdi, %r13 +/APP + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %r8 + movq %r11, %rdi +/APP + movq 136(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -272(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq 32(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 40(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -264(%rbp) +/APP + movq 40(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 48(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rax + movq %r10, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rdi + movq %r12, %r11 + movq %rbx, %r8 + movq %rax, %r12 + movq %rdi, %r13 + movq %r11, %rdi +/APP + movq 144(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r10, %r11 + movq %r9, -256(%rbp) + movq %r13, %r9 +/APP + movq 48(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 56(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r11 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r11 + +/NO_APP + movq %r12, -248(%rbp) + movq %r11, %r13 +/APP + movq 56(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 64(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r10, %rax + movq %rsi, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rdx,%r9 + adcq %rbx,%r13 + adcq %r12,%rax + addq %rdx,%r9 + adcq %rbx,%r13 + adcq %r12,%rax + +/NO_APP + movq %r12, %r11 + movq %rdx, %r8 + movq %rax, %rdx + movq %r13, %r12 + movq %rbx, %rdi + movq %rdx, %r13 + movq %r11, %rsi +/APP + movq 152(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -240(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq 64(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 72(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 104(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rax + movq %rdi, %rdx + movq %rsi, %rbx +/APP + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + +/NO_APP + movq %r12, -232(%rbp) + movq %rdx, %r8 + movq %rax, %rsi + movq %rbx, %rdi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 72(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 80(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 160(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -224(%rbp) + movq %r13, %r9 +/APP + movq 80(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 88(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 104(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -216(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 88(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 96(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 168(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -208(%rbp) + movq %r13, %r9 +/APP + movq 96(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 104(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -200(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 104(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 112(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 168(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 176(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -192(%rbp) + movq %r13, %r9 +/APP + movq 112(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 120(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -184(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 120(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 128(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 168(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 176(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 184(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -176(%rbp) + movq %r13, %r9 +/APP + movq 128(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 136(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -168(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 136(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 144(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rbx + movq %rsi, %rax +/APP + addq %r8,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + addq %r8,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r11 + movq %rbx, %rdi + movq %r10, %rbx +/APP + movq 192(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -160(%rbp) + movq %r13, %r9 +/APP + movq 144(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 152(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%rbx + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%rbx + +/NO_APP + movq %r12, -152(%rbp) +/APP + movq 152(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 160(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rdx + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rax + movq %r13, %rdi + movq %r12, %rsi + movq %rax, %r11 + movq %r10, %r12 +/APP + movq 200(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -144(%rbp) + movq %r11, %r9 +/APP + movq 160(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 168(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -136(%rbp) + movq %r12, %r11 +/APP + movq 168(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 176(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 208(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -128(%rbp) + movq %r11, %r9 +/APP + movq 176(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 184(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rbx, -120(%rbp) + movq %rdx, %r11 + movq %r10, %rbx +/APP + movq 184(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 192(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r10, %r12 +/APP + movq 216(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -112(%rbp) + movq %r11, %r9 +/APP + movq 192(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 200(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 216(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -104(%rbp) + movq %r12, %r11 +/APP + movq 200(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 208(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 216(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi + movq %r10, %r12 +/APP + movq 224(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -96(%rbp) + movq %r10, %r9 +/APP + movq 208(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 216(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 224(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %rax +/APP + addq %r8,%rbx + adcq %r13,%r11 + adcq %rax,%r9 + addq %r8,%rbx + adcq %r13,%r11 + adcq %rax,%r9 + +/NO_APP + movq %rbx, -88(%rbp) + movq %r11, %rsi + movq %r9, %r8 +/APP + movq 216(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r12, %r11 +/APP + movq 224(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r11 + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r11 + +/NO_APP + movq %r8, %r13 + movq %r11, %rbx +/APP + movq 232(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r13 + adcq $0,%rbx + +/NO_APP + movq %rsi, -80(%rbp) + movq %rbx, %r12 + movq %r13, %rdi + movq %r10, %r13 +/APP + movq 224(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rdi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rdi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %r9 + movq %r13, %r12 +/APP + movq 232(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r12 + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %rdi, -72(%rbp) + movq %r9, %r11 + movq %r12, %rbx + movq %r10, %r9 +/APP + movq 232(%rcx),%rax + mulq 248(%rcx) + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r9 + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %rbx, %r13 + movq %r9, %rbx + movq %r10, %r9 +/APP + movq 240(%rcx),%rax + mulq %rax + addq %rax,%r11 + adcq %rdx,%r13 + adcq $0,%rbx + +/NO_APP + movq %r11, -64(%rbp) + movq %r13, %rdi + movq %rbx, %rsi +/APP + movq 240(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r9 + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r9 + +/NO_APP + movq %rdi, -56(%rbp) + movq %r9, %r8 +/APP + movq 248(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r10 + +/NO_APP + movq %rsi, -48(%rbp) + movq 16(%r14), %rdi + leaq -544(%rbp), %rsi + movl $512, %edx + movq %r8, -40(%rbp) + movl $64, 8(%r14) + movl $0, (%r14) + call memcpy@PLT + movl 8(%r14), %edx + testl %edx, %edx + je .L304 + leal -1(%rdx), %ecx + movq 16(%r14), %rsi + mov %ecx, %r10d + cmpq $0, (%rsi,%r10,8) + jne .L302 + movl %ecx, %edx + .align 16 +.L303: + testl %edx, %edx + movl %edx, %ecx + je .L307 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L303 + movl %ecx, 8(%r14) + movl %ecx, %edx +.L302: + testl %edx, %edx + je .L304 + movl (%r14), %eax + movl %eax, (%r14) + addq $512, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.L307: + movl %edx, 8(%r14) + .align 16 +.L304: + xorl %eax, %eax + movl %eax, (%r14) + addq $512, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.LFE9: + .size s_mp_sqr_comba_32, .-s_mp_sqr_comba_32 diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h new file mode 100644 index 0000000000..5be4da4bf2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h @@ -0,0 +1,73 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MP_GF2M_PRIV_H_ +#define _MP_GF2M_PRIV_H_ + +#include "mpi-priv.h" + +extern const mp_digit mp_gf2m_sqr_tb[16]; + +#if defined(MP_USE_UINT_DIGIT) +#define MP_DIGIT_BITS 32 +/* enable fast divide and mod operations on MP_DIGIT_BITS */ +#define MP_DIGIT_BITS_LOG_2 5 +#define MP_DIGIT_BITS_MASK 0x1f +#else +#define MP_DIGIT_BITS 64 +/* enable fast divide and mod operations on MP_DIGIT_BITS */ +#define MP_DIGIT_BITS_LOG_2 6 +#define MP_DIGIT_BITS_MASK 0x3f +#endif + +/* Platform-specific macros for fast binary polynomial squaring. */ +#if MP_DIGIT_BITS == 32 +#define gf2m_SQR1(w) \ + mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] +#define gf2m_SQR0(w) \ + mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF] +#else +#define gf2m_SQR1(w) \ + mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \ + mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \ + mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF] +#define gf2m_SQR0(w) \ + mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \ + mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \ + mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF] +#endif + +/* Multiply two binary polynomials mp_digits a, b. + * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1. + * Output in two mp_digits rh, rl. + */ +void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b); + +/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0) + * result is a binary polynomial in 4 mp_digits r[4]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1, + const mp_digit b0); + +/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0) + * result is a binary polynomial in 6 mp_digits r[6]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0, + const mp_digit b2, const mp_digit b1, const mp_digit b0); + +/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0) + * result is a binary polynomial in 8 mp_digits r[8]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1, + const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1, + const mp_digit b0); + +#endif /* _MP_GF2M_PRIV_H_ */ diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c new file mode 100644 index 0000000000..878b7cae8c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m.c @@ -0,0 +1,677 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mp_gf2m.h" +#include "mp_gf2m-priv.h" +#include "mplogic.h" +#include "mpi-priv.h" + +const mp_digit mp_gf2m_sqr_tb[16] = { + 0, 1, 4, 5, 16, 17, 20, 21, + 64, 65, 68, 69, 80, 81, 84, 85 +}; + +/* Multiply two binary polynomials mp_digits a, b. + * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1. + * Output in two mp_digits rh, rl. + */ +#if MP_DIGIT_BITS == 32 +void +s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b) +{ + register mp_digit h, l, s; + mp_digit tab[8], top2b = a >> 30; + register mp_digit a1, a2, a4; + + a1 = a & (0x3FFFFFFF); + a2 = a1 << 1; + a4 = a2 << 1; + + tab[0] = 0; + tab[1] = a1; + tab[2] = a2; + tab[3] = a1 ^ a2; + tab[4] = a4; + tab[5] = a1 ^ a4; + tab[6] = a2 ^ a4; + tab[7] = a1 ^ a2 ^ a4; + + s = tab[b & 0x7]; + l = s; + s = tab[b >> 3 & 0x7]; + l ^= s << 3; + h = s >> 29; + s = tab[b >> 6 & 0x7]; + l ^= s << 6; + h ^= s >> 26; + s = tab[b >> 9 & 0x7]; + l ^= s << 9; + h ^= s >> 23; + s = tab[b >> 12 & 0x7]; + l ^= s << 12; + h ^= s >> 20; + s = tab[b >> 15 & 0x7]; + l ^= s << 15; + h ^= s >> 17; + s = tab[b >> 18 & 0x7]; + l ^= s << 18; + h ^= s >> 14; + s = tab[b >> 21 & 0x7]; + l ^= s << 21; + h ^= s >> 11; + s = tab[b >> 24 & 0x7]; + l ^= s << 24; + h ^= s >> 8; + s = tab[b >> 27 & 0x7]; + l ^= s << 27; + h ^= s >> 5; + s = tab[b >> 30]; + l ^= s << 30; + h ^= s >> 2; + + /* compensate for the top two bits of a */ + + if (top2b & 01) { + l ^= b << 30; + h ^= b >> 2; + } + if (top2b & 02) { + l ^= b << 31; + h ^= b >> 1; + } + + *rh = h; + *rl = l; +} +#else +void +s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b) +{ + register mp_digit h, l, s; + mp_digit tab[16], top3b = a >> 61; + register mp_digit a1, a2, a4, a8; + + a1 = a & (0x1FFFFFFFFFFFFFFFULL); + a2 = a1 << 1; + a4 = a2 << 1; + a8 = a4 << 1; + tab[0] = 0; + tab[1] = a1; + tab[2] = a2; + tab[3] = a1 ^ a2; + tab[4] = a4; + tab[5] = a1 ^ a4; + tab[6] = a2 ^ a4; + tab[7] = a1 ^ a2 ^ a4; + tab[8] = a8; + tab[9] = a1 ^ a8; + tab[10] = a2 ^ a8; + tab[11] = a1 ^ a2 ^ a8; + tab[12] = a4 ^ a8; + tab[13] = a1 ^ a4 ^ a8; + tab[14] = a2 ^ a4 ^ a8; + tab[15] = a1 ^ a2 ^ a4 ^ a8; + + s = tab[b & 0xF]; + l = s; + s = tab[b >> 4 & 0xF]; + l ^= s << 4; + h = s >> 60; + s = tab[b >> 8 & 0xF]; + l ^= s << 8; + h ^= s >> 56; + s = tab[b >> 12 & 0xF]; + l ^= s << 12; + h ^= s >> 52; + s = tab[b >> 16 & 0xF]; + l ^= s << 16; + h ^= s >> 48; + s = tab[b >> 20 & 0xF]; + l ^= s << 20; + h ^= s >> 44; + s = tab[b >> 24 & 0xF]; + l ^= s << 24; + h ^= s >> 40; + s = tab[b >> 28 & 0xF]; + l ^= s << 28; + h ^= s >> 36; + s = tab[b >> 32 & 0xF]; + l ^= s << 32; + h ^= s >> 32; + s = tab[b >> 36 & 0xF]; + l ^= s << 36; + h ^= s >> 28; + s = tab[b >> 40 & 0xF]; + l ^= s << 40; + h ^= s >> 24; + s = tab[b >> 44 & 0xF]; + l ^= s << 44; + h ^= s >> 20; + s = tab[b >> 48 & 0xF]; + l ^= s << 48; + h ^= s >> 16; + s = tab[b >> 52 & 0xF]; + l ^= s << 52; + h ^= s >> 12; + s = tab[b >> 56 & 0xF]; + l ^= s << 56; + h ^= s >> 8; + s = tab[b >> 60]; + l ^= s << 60; + h ^= s >> 4; + + /* compensate for the top three bits of a */ + + if (top3b & 01) { + l ^= b << 61; + h ^= b >> 3; + } + if (top3b & 02) { + l ^= b << 62; + h ^= b >> 2; + } + if (top3b & 04) { + l ^= b << 63; + h ^= b >> 1; + } + + *rh = h; + *rl = l; +} +#endif + +/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0) + * result is a binary polynomial in 4 mp_digits r[4]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1, + const mp_digit b0) +{ + mp_digit m1, m0; + /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */ + s_bmul_1x1(r + 3, r + 2, a1, b1); + s_bmul_1x1(r + 1, r, a0, b0); + s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1); + /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */ + r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ + r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ +} + +/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0) + * result is a binary polynomial in 6 mp_digits r[6]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0, + const mp_digit b2, const mp_digit b1, const mp_digit b0) +{ + mp_digit zm[4]; + + s_bmul_1x1(r + 5, r + 4, a2, b2); /* fill top 2 words */ + s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */ + s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */ + + zm[3] ^= r[3]; + zm[2] ^= r[2]; + zm[1] ^= r[1] ^ r[5]; + zm[0] ^= r[0] ^ r[4]; + + r[5] ^= zm[3]; + r[4] ^= zm[2]; + r[3] ^= zm[1]; + r[2] ^= zm[0]; +} + +/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0) + * result is a binary polynomial in 8 mp_digits r[8]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1, + const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1, + const mp_digit b0) +{ + mp_digit zm[4]; + + s_bmul_2x2(r + 4, a3, a2, b3, b2); /* fill top 4 words */ + s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */ + s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */ + + zm[3] ^= r[3] ^ r[7]; + zm[2] ^= r[2] ^ r[6]; + zm[1] ^= r[1] ^ r[5]; + zm[0] ^= r[0] ^ r[4]; + + r[5] ^= zm[3]; + r[4] ^= zm[2]; + r[3] ^= zm[1]; + r[2] ^= zm[0]; +} + +/* Compute addition of two binary polynomials a and b, + * store result in c; c could be a or b, a and b could be equal; + * c is the bitwise XOR of a and b. + */ +mp_err +mp_badd(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; + mp_size ix; + mp_size used_pa, used_pb; + mp_err res = MP_OKAY; + + /* Add all digits up to the precision of b. If b had more + * precision than a initially, swap a, b first + */ + if (MP_USED(a) >= MP_USED(b)) { + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + used_pa = MP_USED(a); + used_pb = MP_USED(b); + } else { + pa = MP_DIGITS(b); + pb = MP_DIGITS(a); + used_pa = MP_USED(b); + used_pb = MP_USED(a); + } + + /* Make sure c has enough precision for the output value */ + MP_CHECKOK(s_mp_pad(c, used_pa)); + + /* Do word-by-word xor */ + pc = MP_DIGITS(c); + for (ix = 0; ix < used_pb; ix++) { + (*pc++) = (*pa++) ^ (*pb++); + } + + /* Finish the rest of digits until we're actually done */ + for (; ix < used_pa; ++ix) { + *pc++ = *pa++; + } + + MP_USED(c) = used_pa; + MP_SIGN(c) = ZPOS; + s_mp_clamp(c); + +CLEANUP: + return res; +} + +#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1)); + +/* Compute binary polynomial multiply d = a * b */ +static void +s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d) +{ + mp_digit a_i, a0b0, a1b1, carry = 0; + while (a_len--) { + a_i = *a++; + s_bmul_1x1(&a1b1, &a0b0, a_i, b); + *d++ = a0b0 ^ carry; + carry = a1b1; + } + *d = carry; +} + +/* Compute binary polynomial xor multiply accumulate d ^= a * b */ +static void +s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d) +{ + mp_digit a_i, a0b0, a1b1, carry = 0; + while (a_len--) { + a_i = *a++; + s_bmul_1x1(&a1b1, &a0b0, a_i, b); + *d++ ^= a0b0 ^ carry; + carry = a1b1; + } + *d ^= carry; +} + +/* Compute binary polynomial xor multiply c = a * b. + * All parameters may be identical. + */ +mp_err +mp_bmul(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pb, b_i; + mp_int tmp; + mp_size ib, a_used, b_used; + mp_err res = MP_OKAY; + + MP_DIGITS(&tmp) = 0; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == c) { + MP_CHECKOK(mp_init_copy(&tmp, a)); + if (a == b) + b = &tmp; + a = &tmp; + } else if (b == c) { + MP_CHECKOK(mp_init_copy(&tmp, b)); + b = &tmp; + } + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b if b longer */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b))); + + pb = MP_DIGITS(b); + s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c)); + + /* Outer loop: Digits of b */ + a_used = MP_USED(a); + b_used = MP_USED(b); + MP_USED(c) = a_used + b_used; + for (ib = 1; ib < b_used; ib++) { + b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib); + else + MP_DIGIT(c, ib + a_used) = b_i; + } + + s_mp_clamp(c); + + SIGN(c) = ZPOS; + +CLEANUP: + mp_clear(&tmp); + return res; +} + +/* Compute modular reduction of a and store result in r. + * r could be a. + * For modular arithmetic, the irreducible polynomial f(t) is represented + * as an array of int[], where f(t) is of the form: + * f(t) = t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +mp_err +mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r) +{ + int j, k; + int n, dN, d0, d1; + mp_digit zz, *z, tmp; + mp_size used; + mp_err res = MP_OKAY; + + /* The algorithm does the reduction in place in r, + * if a != r, copy a into r first so reduction can be done in r + */ + if (a != r) { + MP_CHECKOK(mp_copy(a, r)); + } + z = MP_DIGITS(r); + + /* start reduction */ + /*dN = p[0] / MP_DIGIT_BITS; */ + dN = p[0] >> MP_DIGIT_BITS_LOG_2; + used = MP_USED(r); + + for (j = used - 1; j > dN;) { + + zz = z[j]; + if (zz == 0) { + j--; + continue; + } + z[j] = 0; + + for (k = 1; p[k] > 0; k++) { + /* reducing component t^p[k] */ + n = p[0] - p[k]; + /*d0 = n % MP_DIGIT_BITS; */ + d0 = n & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + /*n /= MP_DIGIT_BITS; */ + n >>= MP_DIGIT_BITS_LOG_2; + z[j - n] ^= (zz >> d0); + if (d0) + z[j - n - 1] ^= (zz << d1); + } + + /* reducing component t^0 */ + n = dN; + /*d0 = p[0] % MP_DIGIT_BITS;*/ + d0 = p[0] & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + z[j - n] ^= (zz >> d0); + if (d0) + z[j - n - 1] ^= (zz << d1); + } + + /* final round of reduction */ + while (j == dN) { + + /* d0 = p[0] % MP_DIGIT_BITS; */ + d0 = p[0] & MP_DIGIT_BITS_MASK; + zz = z[dN] >> d0; + if (zz == 0) + break; + d1 = MP_DIGIT_BITS - d0; + + /* clear up the top d1 bits */ + if (d0) { + z[dN] = (z[dN] << d1) >> d1; + } else { + z[dN] = 0; + } + *z ^= zz; /* reduction t^0 component */ + + for (k = 1; p[k] > 0; k++) { + /* reducing component t^p[k]*/ + /* n = p[k] / MP_DIGIT_BITS; */ + n = p[k] >> MP_DIGIT_BITS_LOG_2; + /* d0 = p[k] % MP_DIGIT_BITS; */ + d0 = p[k] & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + z[n] ^= (zz << d0); + tmp = zz >> d1; + if (d0 && tmp) + z[n + 1] ^= tmp; + } + } + + s_mp_clamp(r); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p, + * Store the result in r. r could be a or b; a could be b. + */ +mp_err +mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r) +{ + mp_err res; + + if (a == b) + return mp_bsqrmod(a, p, r); + if ((res = mp_bmul(a, b, r)) != MP_OKAY) + return res; + return mp_bmod(r, p, r); +} + +/* Compute binary polynomial squaring c = a*a mod p . + * Parameter r and a can be identical. + */ + +mp_err +mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r) +{ + mp_digit *pa, *pr, a_i; + mp_int tmp; + mp_size ia, a_used; + mp_err res; + + ARGCHK(a != NULL && r != NULL, MP_BADARG); + MP_DIGITS(&tmp) = 0; + + if (a == r) { + MP_CHECKOK(mp_init_copy(&tmp, a)); + a = &tmp; + } + + MP_USED(r) = 1; + MP_DIGIT(r, 0) = 0; + MP_CHECKOK(s_mp_pad(r, 2 * USED(a))); + + pa = MP_DIGITS(a); + pr = MP_DIGITS(r); + a_used = MP_USED(a); + MP_USED(r) = 2 * a_used; + + for (ia = 0; ia < a_used; ia++) { + a_i = *pa++; + *pr++ = gf2m_SQR0(a_i); + *pr++ = gf2m_SQR1(a_i); + } + + MP_CHECKOK(mp_bmod(r, p, r)); + s_mp_clamp(r); + SIGN(r) = ZPOS; + +CLEANUP: + mp_clear(&tmp); + return res; +} + +/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p. + * Store the result in r. r could be x or y, and x could equal y. + * Uses algorithm Modular_Division_GF(2^m) from + * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to + * the Great Divide". + */ +int +mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp, + const unsigned int p[], mp_int *r) +{ + mp_int aa, bb, uu; + mp_int *a, *b, *u, *v; + mp_err res = MP_OKAY; + + MP_DIGITS(&aa) = 0; + MP_DIGITS(&bb) = 0; + MP_DIGITS(&uu) = 0; + + MP_CHECKOK(mp_init_copy(&aa, x)); + MP_CHECKOK(mp_init_copy(&uu, y)); + MP_CHECKOK(mp_init_copy(&bb, pp)); + MP_CHECKOK(s_mp_pad(r, USED(pp))); + MP_USED(r) = 1; + MP_DIGIT(r, 0) = 0; + + a = &aa; + b = &bb; + u = &uu; + v = r; + /* reduce x and y mod p */ + MP_CHECKOK(mp_bmod(a, p, a)); + MP_CHECKOK(mp_bmod(u, p, u)); + + while (!mp_isodd(a)) { + s_mp_div2(a); + if (mp_isodd(u)) { + MP_CHECKOK(mp_badd(u, pp, u)); + } + s_mp_div2(u); + } + + do { + if (mp_cmp_mag(b, a) > 0) { + MP_CHECKOK(mp_badd(b, a, b)); + MP_CHECKOK(mp_badd(v, u, v)); + do { + s_mp_div2(b); + if (mp_isodd(v)) { + MP_CHECKOK(mp_badd(v, pp, v)); + } + s_mp_div2(v); + } while (!mp_isodd(b)); + } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1)) + break; + else { + MP_CHECKOK(mp_badd(a, b, a)); + MP_CHECKOK(mp_badd(u, v, u)); + do { + s_mp_div2(a); + if (mp_isodd(u)) { + MP_CHECKOK(mp_badd(u, pp, u)); + } + s_mp_div2(u); + } while (!mp_isodd(a)); + } + } while (1); + + MP_CHECKOK(mp_copy(u, r)); + +CLEANUP: + mp_clear(&aa); + mp_clear(&bb); + mp_clear(&uu); + return res; +} + +/* Convert the bit-string representation of a polynomial a into an array + * of integers corresponding to the bits with non-zero coefficient. + * Up to max elements of the array will be filled. Return value is total + * number of coefficients that would be extracted if array was large enough. + */ +int +mp_bpoly2arr(const mp_int *a, unsigned int p[], int max) +{ + int i, j, k; + mp_digit top_bit, mask; + + top_bit = 1; + top_bit <<= MP_DIGIT_BIT - 1; + + for (k = 0; k < max; k++) + p[k] = 0; + k = 0; + + for (i = MP_USED(a) - 1; i >= 0; i--) { + mask = top_bit; + for (j = MP_DIGIT_BIT - 1; j >= 0; j--) { + if (MP_DIGITS(a)[i] & mask) { + if (k < max) + p[k] = MP_DIGIT_BIT * i + j; + k++; + } + mask >>= 1; + } + } + + return k; +} + +/* Convert the coefficient array representation of a polynomial to a + * bit-string. The array must be terminated by 0. + */ +mp_err +mp_barr2poly(const unsigned int p[], mp_int *a) +{ + + mp_err res = MP_OKAY; + int i; + + mp_zero(a); + for (i = 0; p[i] > 0; i++) { + MP_CHECKOK(mpl_set_bit(a, p[i], 1)); + } + MP_CHECKOK(mpl_set_bit(a, 0, 1)); + +CLEANUP: + return res; +} diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h new file mode 100644 index 0000000000..ed2c85493c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m.h @@ -0,0 +1,28 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MP_GF2M_H_ +#define _MP_GF2M_H_ + +#include "mpi.h" + +mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c); + +/* For modular arithmetic, the irreducible polynomial f(t) is represented + * as an array of int[], where f(t) is of the form: + * f(t) = t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r); +mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], + mp_int *r); +mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r); +mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp, + const unsigned int p[], mp_int *r); + +int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max); +mp_err mp_barr2poly(const unsigned int p[], mp_int *a); + +#endif /* _MP_GF2M_H_ */ diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c new file mode 100644 index 0000000000..ddc21ec1cb --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache.c @@ -0,0 +1,788 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "prtypes.h" + +/* + * This file implements a single function: s_mpi_getProcessorLineSize(); + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * Currently the file returns good data for most modern x86 processors, and + * reasonable data on 64-bit ppc processors. All other processors are assumed + * to have a cache line size of 32 bytes. + * + */ + +#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +/* X86 processors have special instructions that tell us about the cache */ +#include "string.h" + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define AMD_64 1 +#endif + +/* Generic CPUID function */ +#if defined(AMD_64) + +#if defined(__GNUC__) + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + __asm__("xor %%ecx, %%ecx\n\t" + "cpuid\n\t" + : "=a"(*eax), + "=b"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +#elif defined(_MSC_VER) + +#include + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + int intrinsic_out[4]; + + __cpuid(intrinsic_out, op); + *eax = intrinsic_out[0]; + *ebx = intrinsic_out[1]; + *ecx = intrinsic_out[2]; + *edx = intrinsic_out[3]; +} + +#endif + +#else /* !defined(AMD_64) */ + +/* x86 */ + +#if defined(__GNUC__) +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + /* Some older processors don't fill the ecx register with cpuid, so clobber it + * before calling cpuid, so that there's no risk of picking random bits that + * erroneously indicate that absent CPU features are present. + * Also, GCC isn't smart enough to save the ebx PIC register on its own + * in this case, so do it by hand. Use edi to store ebx and pass the + * value returned in ebx from cpuid through edi. */ + __asm__("xor %%ecx, %%ecx\n\t" + "mov %%ebx,%%edi\n\t" + "cpuid\n\t" + "xchgl %%ebx,%%edi\n\t" + : "=a"(*eax), + "=D"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +/* + * try flipping a processor flag to determine CPU type + */ +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm__("pushfl\n\t" /* get the flags */ + "popl %0\n\t" + "movl %0,%1\n\t" /* save the original flags */ + "xorl %2,%0\n\t" /* flip the bit */ + "pushl %0\n\t" /* set the flags */ + "popfl\n\t" + "pushfl\n\t" /* get the flags again (for return) */ + "popl %0\n\t" + "pushl %1\n\t" /* restore the original flags */ + "popfl\n\t" + : "=r"(changedFlags), + "=r"(originalFlags), + "=r"(flag) + : "2"(flag)); + return changedFlags ^ originalFlags; +} + +#elif defined(_MSC_VER) + +/* + * windows versions of the above assembler + */ +#define wcpuid __asm __emit 0fh __asm __emit 0a2h +void +freebl_cpuid(unsigned long op, unsigned long *Reax, + unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) +{ + unsigned long Leax, Lebx, Lecx, Ledx; + __asm { + pushad + xor ecx,ecx + mov eax,op + wcpuid + mov Leax,eax + mov Lebx,ebx + mov Lecx,ecx + mov Ledx,edx + popad + } + *Reax = Leax; + *Rebx = Lebx; + *Recx = Lecx; + *Redx = Ledx; +} + +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm { + push eax + push ebx + pushfd /* get the flags */ + pop eax + push eax /* save the flags on the stack */ + mov originalFlags,eax /* save the original flags */ + mov ebx,flag + xor eax,ebx /* flip the bit */ + push eax /* set the flags */ + popfd + pushfd /* get the flags again (for return) */ + pop eax + popfd /* restore the original flags */ + mov changedFlags,eax + pop ebx + pop eax + } + return changedFlags ^ originalFlags; +} +#endif + +#endif + +#if !defined(AMD_64) +#define AC_FLAG 0x40000 +#define ID_FLAG 0x200000 + +/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ +static int +is386() +{ + return changeFlag(AC_FLAG) == 0; +} + +/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ +static int +is486() +{ + return changeFlag(ID_FLAG) == 0; +} +#endif + +/* + * table for Intel Cache. + * See Intel Application Note AP-485 for more information + */ + +typedef unsigned char CacheTypeEntry; + +typedef enum { + Cache_NONE = 0, + Cache_UNKNOWN = 1, + Cache_TLB = 2, + Cache_TLBi = 3, + Cache_TLBd = 4, + Cache_Trace = 5, + Cache_L1 = 6, + Cache_L1i = 7, + Cache_L1d = 8, + Cache_L2 = 9, + Cache_L2i = 10, + Cache_L2d = 11, + Cache_L3 = 12, + Cache_L3i = 13, + Cache_L3d = 14 +} CacheType; + +struct _cache { + CacheTypeEntry type; + unsigned char lineSize; +}; +static const struct _cache CacheMap[256] = { + /* 00 */ { Cache_NONE, 0 }, + /* 01 */ { Cache_TLBi, 0 }, + /* 02 */ { Cache_TLBi, 0 }, + /* 03 */ { Cache_TLBd, 0 }, + /* 04 */ { + Cache_TLBd, + }, + /* 05 */ { Cache_UNKNOWN, 0 }, + /* 06 */ { Cache_L1i, 32 }, + /* 07 */ { Cache_UNKNOWN, 0 }, + /* 08 */ { Cache_L1i, 32 }, + /* 09 */ { Cache_UNKNOWN, 0 }, + /* 0a */ { Cache_L1d, 32 }, + /* 0b */ { Cache_UNKNOWN, 0 }, + /* 0c */ { Cache_L1d, 32 }, + /* 0d */ { Cache_UNKNOWN, 0 }, + /* 0e */ { Cache_UNKNOWN, 0 }, + /* 0f */ { Cache_UNKNOWN, 0 }, + /* 10 */ { Cache_UNKNOWN, 0 }, + /* 11 */ { Cache_UNKNOWN, 0 }, + /* 12 */ { Cache_UNKNOWN, 0 }, + /* 13 */ { Cache_UNKNOWN, 0 }, + /* 14 */ { Cache_UNKNOWN, 0 }, + /* 15 */ { Cache_UNKNOWN, 0 }, + /* 16 */ { Cache_UNKNOWN, 0 }, + /* 17 */ { Cache_UNKNOWN, 0 }, + /* 18 */ { Cache_UNKNOWN, 0 }, + /* 19 */ { Cache_UNKNOWN, 0 }, + /* 1a */ { Cache_UNKNOWN, 0 }, + /* 1b */ { Cache_UNKNOWN, 0 }, + /* 1c */ { Cache_UNKNOWN, 0 }, + /* 1d */ { Cache_UNKNOWN, 0 }, + /* 1e */ { Cache_UNKNOWN, 0 }, + /* 1f */ { Cache_UNKNOWN, 0 }, + /* 20 */ { Cache_UNKNOWN, 0 }, + /* 21 */ { Cache_UNKNOWN, 0 }, + /* 22 */ { Cache_L3, 64 }, + /* 23 */ { Cache_L3, 64 }, + /* 24 */ { Cache_UNKNOWN, 0 }, + /* 25 */ { Cache_L3, 64 }, + /* 26 */ { Cache_UNKNOWN, 0 }, + /* 27 */ { Cache_UNKNOWN, 0 }, + /* 28 */ { Cache_UNKNOWN, 0 }, + /* 29 */ { Cache_L3, 64 }, + /* 2a */ { Cache_UNKNOWN, 0 }, + /* 2b */ { Cache_UNKNOWN, 0 }, + /* 2c */ { Cache_L1d, 64 }, + /* 2d */ { Cache_UNKNOWN, 0 }, + /* 2e */ { Cache_UNKNOWN, 0 }, + /* 2f */ { Cache_UNKNOWN, 0 }, + /* 30 */ { Cache_L1i, 64 }, + /* 31 */ { Cache_UNKNOWN, 0 }, + /* 32 */ { Cache_UNKNOWN, 0 }, + /* 33 */ { Cache_UNKNOWN, 0 }, + /* 34 */ { Cache_UNKNOWN, 0 }, + /* 35 */ { Cache_UNKNOWN, 0 }, + /* 36 */ { Cache_UNKNOWN, 0 }, + /* 37 */ { Cache_UNKNOWN, 0 }, + /* 38 */ { Cache_UNKNOWN, 0 }, + /* 39 */ { Cache_L2, 64 }, + /* 3a */ { Cache_UNKNOWN, 0 }, + /* 3b */ { Cache_L2, 64 }, + /* 3c */ { Cache_L2, 64 }, + /* 3d */ { Cache_UNKNOWN, 0 }, + /* 3e */ { Cache_UNKNOWN, 0 }, + /* 3f */ { Cache_UNKNOWN, 0 }, + /* 40 */ { Cache_L2, 0 }, + /* 41 */ { Cache_L2, 32 }, + /* 42 */ { Cache_L2, 32 }, + /* 43 */ { Cache_L2, 32 }, + /* 44 */ { Cache_L2, 32 }, + /* 45 */ { Cache_L2, 32 }, + /* 46 */ { Cache_UNKNOWN, 0 }, + /* 47 */ { Cache_UNKNOWN, 0 }, + /* 48 */ { Cache_UNKNOWN, 0 }, + /* 49 */ { Cache_UNKNOWN, 0 }, + /* 4a */ { Cache_UNKNOWN, 0 }, + /* 4b */ { Cache_UNKNOWN, 0 }, + /* 4c */ { Cache_UNKNOWN, 0 }, + /* 4d */ { Cache_UNKNOWN, 0 }, + /* 4e */ { Cache_UNKNOWN, 0 }, + /* 4f */ { Cache_UNKNOWN, 0 }, + /* 50 */ { Cache_TLBi, 0 }, + /* 51 */ { Cache_TLBi, 0 }, + /* 52 */ { Cache_TLBi, 0 }, + /* 53 */ { Cache_UNKNOWN, 0 }, + /* 54 */ { Cache_UNKNOWN, 0 }, + /* 55 */ { Cache_UNKNOWN, 0 }, + /* 56 */ { Cache_UNKNOWN, 0 }, + /* 57 */ { Cache_UNKNOWN, 0 }, + /* 58 */ { Cache_UNKNOWN, 0 }, + /* 59 */ { Cache_UNKNOWN, 0 }, + /* 5a */ { Cache_UNKNOWN, 0 }, + /* 5b */ { Cache_TLBd, 0 }, + /* 5c */ { Cache_TLBd, 0 }, + /* 5d */ { Cache_TLBd, 0 }, + /* 5e */ { Cache_UNKNOWN, 0 }, + /* 5f */ { Cache_UNKNOWN, 0 }, + /* 60 */ { Cache_UNKNOWN, 0 }, + /* 61 */ { Cache_UNKNOWN, 0 }, + /* 62 */ { Cache_UNKNOWN, 0 }, + /* 63 */ { Cache_UNKNOWN, 0 }, + /* 64 */ { Cache_UNKNOWN, 0 }, + /* 65 */ { Cache_UNKNOWN, 0 }, + /* 66 */ { Cache_L1d, 64 }, + /* 67 */ { Cache_L1d, 64 }, + /* 68 */ { Cache_L1d, 64 }, + /* 69 */ { Cache_UNKNOWN, 0 }, + /* 6a */ { Cache_UNKNOWN, 0 }, + /* 6b */ { Cache_UNKNOWN, 0 }, + /* 6c */ { Cache_UNKNOWN, 0 }, + /* 6d */ { Cache_UNKNOWN, 0 }, + /* 6e */ { Cache_UNKNOWN, 0 }, + /* 6f */ { Cache_UNKNOWN, 0 }, + /* 70 */ { Cache_Trace, 1 }, + /* 71 */ { Cache_Trace, 1 }, + /* 72 */ { Cache_Trace, 1 }, + /* 73 */ { Cache_UNKNOWN, 0 }, + /* 74 */ { Cache_UNKNOWN, 0 }, + /* 75 */ { Cache_UNKNOWN, 0 }, + /* 76 */ { Cache_UNKNOWN, 0 }, + /* 77 */ { Cache_UNKNOWN, 0 }, + /* 78 */ { Cache_UNKNOWN, 0 }, + /* 79 */ { Cache_L2, 64 }, + /* 7a */ { Cache_L2, 64 }, + /* 7b */ { Cache_L2, 64 }, + /* 7c */ { Cache_L2, 64 }, + /* 7d */ { Cache_UNKNOWN, 0 }, + /* 7e */ { Cache_UNKNOWN, 0 }, + /* 7f */ { Cache_UNKNOWN, 0 }, + /* 80 */ { Cache_UNKNOWN, 0 }, + /* 81 */ { Cache_UNKNOWN, 0 }, + /* 82 */ { Cache_L2, 32 }, + /* 83 */ { Cache_L2, 32 }, + /* 84 */ { Cache_L2, 32 }, + /* 85 */ { Cache_L2, 32 }, + /* 86 */ { Cache_L2, 64 }, + /* 87 */ { Cache_L2, 64 }, + /* 88 */ { Cache_UNKNOWN, 0 }, + /* 89 */ { Cache_UNKNOWN, 0 }, + /* 8a */ { Cache_UNKNOWN, 0 }, + /* 8b */ { Cache_UNKNOWN, 0 }, + /* 8c */ { Cache_UNKNOWN, 0 }, + /* 8d */ { Cache_UNKNOWN, 0 }, + /* 8e */ { Cache_UNKNOWN, 0 }, + /* 8f */ { Cache_UNKNOWN, 0 }, + /* 90 */ { Cache_UNKNOWN, 0 }, + /* 91 */ { Cache_UNKNOWN, 0 }, + /* 92 */ { Cache_UNKNOWN, 0 }, + /* 93 */ { Cache_UNKNOWN, 0 }, + /* 94 */ { Cache_UNKNOWN, 0 }, + /* 95 */ { Cache_UNKNOWN, 0 }, + /* 96 */ { Cache_UNKNOWN, 0 }, + /* 97 */ { Cache_UNKNOWN, 0 }, + /* 98 */ { Cache_UNKNOWN, 0 }, + /* 99 */ { Cache_UNKNOWN, 0 }, + /* 9a */ { Cache_UNKNOWN, 0 }, + /* 9b */ { Cache_UNKNOWN, 0 }, + /* 9c */ { Cache_UNKNOWN, 0 }, + /* 9d */ { Cache_UNKNOWN, 0 }, + /* 9e */ { Cache_UNKNOWN, 0 }, + /* 9f */ { Cache_UNKNOWN, 0 }, + /* a0 */ { Cache_UNKNOWN, 0 }, + /* a1 */ { Cache_UNKNOWN, 0 }, + /* a2 */ { Cache_UNKNOWN, 0 }, + /* a3 */ { Cache_UNKNOWN, 0 }, + /* a4 */ { Cache_UNKNOWN, 0 }, + /* a5 */ { Cache_UNKNOWN, 0 }, + /* a6 */ { Cache_UNKNOWN, 0 }, + /* a7 */ { Cache_UNKNOWN, 0 }, + /* a8 */ { Cache_UNKNOWN, 0 }, + /* a9 */ { Cache_UNKNOWN, 0 }, + /* aa */ { Cache_UNKNOWN, 0 }, + /* ab */ { Cache_UNKNOWN, 0 }, + /* ac */ { Cache_UNKNOWN, 0 }, + /* ad */ { Cache_UNKNOWN, 0 }, + /* ae */ { Cache_UNKNOWN, 0 }, + /* af */ { Cache_UNKNOWN, 0 }, + /* b0 */ { Cache_TLBi, 0 }, + /* b1 */ { Cache_UNKNOWN, 0 }, + /* b2 */ { Cache_UNKNOWN, 0 }, + /* b3 */ { Cache_TLBd, 0 }, + /* b4 */ { Cache_UNKNOWN, 0 }, + /* b5 */ { Cache_UNKNOWN, 0 }, + /* b6 */ { Cache_UNKNOWN, 0 }, + /* b7 */ { Cache_UNKNOWN, 0 }, + /* b8 */ { Cache_UNKNOWN, 0 }, + /* b9 */ { Cache_UNKNOWN, 0 }, + /* ba */ { Cache_UNKNOWN, 0 }, + /* bb */ { Cache_UNKNOWN, 0 }, + /* bc */ { Cache_UNKNOWN, 0 }, + /* bd */ { Cache_UNKNOWN, 0 }, + /* be */ { Cache_UNKNOWN, 0 }, + /* bf */ { Cache_UNKNOWN, 0 }, + /* c0 */ { Cache_UNKNOWN, 0 }, + /* c1 */ { Cache_UNKNOWN, 0 }, + /* c2 */ { Cache_UNKNOWN, 0 }, + /* c3 */ { Cache_UNKNOWN, 0 }, + /* c4 */ { Cache_UNKNOWN, 0 }, + /* c5 */ { Cache_UNKNOWN, 0 }, + /* c6 */ { Cache_UNKNOWN, 0 }, + /* c7 */ { Cache_UNKNOWN, 0 }, + /* c8 */ { Cache_UNKNOWN, 0 }, + /* c9 */ { Cache_UNKNOWN, 0 }, + /* ca */ { Cache_UNKNOWN, 0 }, + /* cb */ { Cache_UNKNOWN, 0 }, + /* cc */ { Cache_UNKNOWN, 0 }, + /* cd */ { Cache_UNKNOWN, 0 }, + /* ce */ { Cache_UNKNOWN, 0 }, + /* cf */ { Cache_UNKNOWN, 0 }, + /* d0 */ { Cache_UNKNOWN, 0 }, + /* d1 */ { Cache_UNKNOWN, 0 }, + /* d2 */ { Cache_UNKNOWN, 0 }, + /* d3 */ { Cache_UNKNOWN, 0 }, + /* d4 */ { Cache_UNKNOWN, 0 }, + /* d5 */ { Cache_UNKNOWN, 0 }, + /* d6 */ { Cache_UNKNOWN, 0 }, + /* d7 */ { Cache_UNKNOWN, 0 }, + /* d8 */ { Cache_UNKNOWN, 0 }, + /* d9 */ { Cache_UNKNOWN, 0 }, + /* da */ { Cache_UNKNOWN, 0 }, + /* db */ { Cache_UNKNOWN, 0 }, + /* dc */ { Cache_UNKNOWN, 0 }, + /* dd */ { Cache_UNKNOWN, 0 }, + /* de */ { Cache_UNKNOWN, 0 }, + /* df */ { Cache_UNKNOWN, 0 }, + /* e0 */ { Cache_UNKNOWN, 0 }, + /* e1 */ { Cache_UNKNOWN, 0 }, + /* e2 */ { Cache_UNKNOWN, 0 }, + /* e3 */ { Cache_UNKNOWN, 0 }, + /* e4 */ { Cache_UNKNOWN, 0 }, + /* e5 */ { Cache_UNKNOWN, 0 }, + /* e6 */ { Cache_UNKNOWN, 0 }, + /* e7 */ { Cache_UNKNOWN, 0 }, + /* e8 */ { Cache_UNKNOWN, 0 }, + /* e9 */ { Cache_UNKNOWN, 0 }, + /* ea */ { Cache_UNKNOWN, 0 }, + /* eb */ { Cache_UNKNOWN, 0 }, + /* ec */ { Cache_UNKNOWN, 0 }, + /* ed */ { Cache_UNKNOWN, 0 }, + /* ee */ { Cache_UNKNOWN, 0 }, + /* ef */ { Cache_UNKNOWN, 0 }, + /* f0 */ { Cache_UNKNOWN, 0 }, + /* f1 */ { Cache_UNKNOWN, 0 }, + /* f2 */ { Cache_UNKNOWN, 0 }, + /* f3 */ { Cache_UNKNOWN, 0 }, + /* f4 */ { Cache_UNKNOWN, 0 }, + /* f5 */ { Cache_UNKNOWN, 0 }, + /* f6 */ { Cache_UNKNOWN, 0 }, + /* f7 */ { Cache_UNKNOWN, 0 }, + /* f8 */ { Cache_UNKNOWN, 0 }, + /* f9 */ { Cache_UNKNOWN, 0 }, + /* fa */ { Cache_UNKNOWN, 0 }, + /* fb */ { Cache_UNKNOWN, 0 }, + /* fc */ { Cache_UNKNOWN, 0 }, + /* fd */ { Cache_UNKNOWN, 0 }, + /* fe */ { Cache_UNKNOWN, 0 }, + /* ff */ { Cache_UNKNOWN, 0 } +}; + +/* + * use the above table to determine the CacheEntryLineSize. + */ +static void +getIntelCacheEntryLineSize(unsigned long val, int *level, + unsigned long *lineSize) +{ + CacheType type; + + type = CacheMap[val].type; + /* only interested in data caches */ + /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. + * this data check has the side effect of rejecting that entry. If + * that wasn't the case, we could have to reject it explicitly */ + if (CacheMap[val].lineSize == 0) { + return; + } + /* look at the caches, skip types we aren't interested in. + * if we already have a value for a lower level cache, skip the + * current entry */ + if ((type == Cache_L1) || (type == Cache_L1d)) { + *level = 1; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { + *level = 2; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { + *level = 3; + *lineSize = CacheMap[val].lineSize; + } + return; +} + +static void +getIntelRegisterCacheLineSize(unsigned long val, + int *level, unsigned long *lineSize) +{ + getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val & 0xff, level, lineSize); +} + +/* + * returns '0' if no recognized cache is found, or if the cache + * information is supported by this processor + */ +static unsigned long +getIntelCacheLineSize(int cpuidLevel) +{ + int level = 4; + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + int repeat, count; + + if (cpuidLevel < 2) { + return 0; + } + + /* command '2' of the cpuid is intel's cache info call. Each byte of the + * 4 registers contain a potential descriptor for the cache. The CacheMap + * table maps the cache entry with the processor cache. Register 'al' + * contains a count value that cpuid '2' needs to be called in order to + * find all the cache descriptors. Only registers with the high bit set + * to 'zero' have valid descriptors. This code loops through all the + * required calls to cpuid '2' and passes any valid descriptors it finds + * to the getIntelRegisterCacheLineSize code, which breaks the registers + * down into their component descriptors. In the end the lineSize of the + * lowest level cache data cache is returned. */ + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + repeat = eax & 0xf; + for (count = 0; count < repeat; count++) { + if ((eax & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); + } + if ((ebx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ebx, &level, &lineSize); + } + if ((ecx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ecx, &level, &lineSize); + } + if ((edx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(edx, &level, &lineSize); + } + if (count + 1 != repeat) { + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + } + } + return lineSize; +} + +/* + * returns '0' if the cache info is not supported by this processor. + * This is based on the AMD extended cache commands for cpuid. + * (see "AMD Processor Recognition Application Note" Publication 20734). + * Some other processors use the identical scheme. + * (see "Processor Recognition, Transmeta Corporation"). + */ +static unsigned long +getOtherCacheLineSize(unsigned long cpuidLevel) +{ + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + + /* get the Extended CPUID level */ + freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + + if (cpuidLevel >= 0x80000005) { + freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ + } + return lineSize; +} + +static const char *const manMap[] = { +#define INTEL 0 + "GenuineIntel", +#define AMD 1 + "AuthenticAMD", +#define CYRIX 2 + "CyrixInstead", +#define CENTAUR 2 + "CentaurHauls", +#define NEXGEN 3 + "NexGenDriven", +#define TRANSMETA 4 + "GenuineTMx86", +#define RISE 5 + "RiseRiseRise", +#define UMC 6 + "UMC UMC UMC ", +#define SIS 7 + "Sis Sis Sis ", +#define NATIONAL 8 + "Geode by NSC", +}; + +static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); + +#define MAN_UNKNOWN 9 + +#if !defined(AMD_64) +#define SSE2_FLAG (1 << 26) +unsigned long +s_mpi_is_sse2() +{ + unsigned long eax, ebx, ecx, edx; + + if (is386() || is486()) { + return 0; + } + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + + /* has no SSE2 extensions */ + if (eax == 0) { + return 0; + } + + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); + return (edx & SSE2_FLAG) == SSE2_FLAG; +} +#endif + +unsigned long +s_mpi_getProcessorLineSize() +{ + unsigned long eax, ebx, ecx, edx; + PRUint32 cpuid[3]; + unsigned long cpuidLevel; + unsigned long cacheLineSize = 0; + int manufacturer = MAN_UNKNOWN; + int i; + char string[13]; + +#if !defined(AMD_64) + if (is386()) { + return 0; /* 386 had no cache */ + } + if (is486()) { + return 32; /* really? need more info */ + } +#endif + + /* Pentium, cpuid command is available */ + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + /* string holds the CPU's manufacturer ID string - a twelve + * character ASCII string stored in ebx, edx, ecx, and + * the 32-bit extended feature flags are in edx, ecx. + */ + cpuid[0] = ebx; + cpuid[1] = ecx; + cpuid[2] = edx; + memcpy(string, cpuid, sizeof(cpuid)); + string[12] = 0; + + manufacturer = MAN_UNKNOWN; + for (i = 0; i < n_manufacturers; i++) { + if (strcmp(manMap[i], string) == 0) { + manufacturer = i; + } + } + + if (manufacturer == INTEL) { + cacheLineSize = getIntelCacheLineSize(cpuidLevel); + } else { + cacheLineSize = getOtherCacheLineSize(cpuidLevel); + } + /* doesn't support cache info based on cpuid. This means + * an old pentium class processor, which have cache lines of + * 32. If we learn differently, we can use a switch based on + * the Manufacturer id */ + if (cacheLineSize == 0) { + cacheLineSize = 32; + } + return cacheLineSize; +} +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +#if defined(__ppc64__) +/* + * Sigh, The PPC has some really nice features to help us determine cache + * size, since it had lots of direct control functions to do so. The POWER + * processor even has an instruction to do this, but it was dropped in + * PowerPC. Unfortunately most of them are not available in user mode. + * + * The dcbz function would be a great way to determine cache line size except + * 1) it only works on write-back memory (it throws an exception otherwise), + * and 2) because so many mac programs 'knew' the processor cache size was + * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new + * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep + * these programs happy. dcbzl work if 64 bit instructions are supported. + * If you know 64 bit instructions are supported, and that stack is + * write-back, you can use this code. + */ +#include "memory.h" + +/* clear the cache line that contains 'array' */ +static inline void +dcbzl(char *array) +{ + __asm__("dcbzl %0, %1" + : /*no result*/ + : "b%"(array), "r"(0) + : "memory"); +} + +#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) + +#define PPC_MAX_LINE_SIZE 256 +unsigned long +s_mpi_getProcessorLineSize() +{ + char testArray[2 * PPC_MAX_LINE_SIZE + 1]; + char *test; + int i; + + /* align the array on a maximum line size boundary, so we + * know we are starting to clear from the first address */ + test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); + /* set all the values to 1's */ + memset(test, 0xff, PPC_MAX_LINE_SIZE); + /* clear one cache block starting at 'test' */ + dcbzl(test); + + /* find the size of the cleared area, that's our block size */ + for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { + if (test[i - 1] == 0) { + return i; + } + } + return 0; +} + +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +/* + * put other processor and platform specific cache code here + * return the smallest cache line size in bytes on the processor + * (usually the L1 cache). If the OS has a call, this would be + * a greate place to put it. + * + * If there is no cache, return 0; + * + * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions + * below aren't compiled. + * + */ + +/* If no way to get the processor cache line size has been defined, assume + * it's 32 bytes (most common value, does not significantly impact performance) + */ +#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED +unsigned long +s_mpi_getProcessorLineSize() +{ + return 32; +} +#endif diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s new file mode 100644 index 0000000000..d493b4762f --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s @@ -0,0 +1,861 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "mpcpucache.c" +/ .section .rodata.str1.1,"aMS",@progbits,1 + .section .rodata +.LC0: + .string "GenuineIntel" +.LC1: + .string "AuthenticAMD" +.LC2: + .string "CyrixInstead" +.LC3: + .string "CentaurHauls" +.LC4: + .string "NexGenDriven" +.LC5: + .string "GenuineTMx86" +.LC6: + .string "RiseRiseRise" +.LC7: + .string "UMC UMC UMC " +.LC8: + .string "Sis Sis Sis " +.LC9: + .string "Geode by NSC" + .section .data.rel.ro.local,"aw",@progbits + .align 32 + .type manMap, @object + .size manMap, 80 +manMap: + .quad .LC0 + .quad .LC1 + .quad .LC2 + .quad .LC3 + .quad .LC4 + .quad .LC5 + .quad .LC6 + .quad .LC7 + .quad .LC8 + .quad .LC9 + .section .rodata + .align 32 + .type CacheMap, @object + .size CacheMap, 512 +CacheMap: + .byte 0 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .zero 1 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 7 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .text + .align 16 +.globl freebl_cpuid + .type freebl_cpuid, @function +freebl_cpuid: +.LFB2: + movq %rdx, %r10 + pushq %rbx +.LCFI0: + movq %rcx, %r11 + movq %rdi, %rax +/APP + cpuid + +/NO_APP + movq %rax, (%rsi) + movq %rbx, (%r10) + popq %rbx + movq %rcx, (%r11) + movq %rdx, (%r8) + ret +.LFE2: + .size freebl_cpuid, .-freebl_cpuid + .align 16 + .type getIntelCacheEntryLineSize, @function +getIntelCacheEntryLineSize: +.LFB3: + leaq CacheMap(%rip), %r9 + movq %rdx, %r10 + movzbl 1(%r9,%rdi,2), %ecx + movzbl (%r9,%rdi,2), %r8d + testb %cl, %cl + je .L2 + cmpl $6, %r8d + sete %dl + cmpl $8, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L4 + movl $1, (%rsi) +.L9: + movzbl %cl, %eax + movq %rax, (%r10) + ret + .align 16 +.L4: + movl (%rsi), %r11d + cmpl $1, %r11d + jg .L11 +.L6: + cmpl $2, %r11d + jle .L2 + cmpl $12, %r8d + sete %dl + cmpl $14, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L2 + movzbq 1(%r9,%rdi,2), %rax + movl $3, (%rsi) + movq %rax, (%r10) + .align 16 +.L2: + rep ; ret + .align 16 +.L11: + cmpl $9, %r8d + sete %dl + cmpl $11, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L6 + movl $2, (%rsi) + jmp .L9 +.LFE3: + .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize + .align 16 + .type getIntelRegisterCacheLineSize, @function +getIntelRegisterCacheLineSize: +.LFB4: + pushq %rbp +.LCFI1: + movq %rsp, %rbp +.LCFI2: + movq %rbx, -24(%rbp) +.LCFI3: + movq %rdi, %rbx + shrq $24, %rdi + movq %r12, -16(%rbp) +.LCFI4: + movq %r13, -8(%rbp) +.LCFI5: + andl $255, %edi + subq $24, %rsp +.LCFI6: + movq %rsi, %r13 + movq %rdx, %r12 + call getIntelCacheEntryLineSize + movq %rbx, %rdi + movq %r12, %rdx + movq %r13, %rsi + shrq $16, %rdi + andl $255, %edi + call getIntelCacheEntryLineSize + movq %rbx, %rdi + movq %r12, %rdx + movq %r13, %rsi + shrq $8, %rdi + andl $255, %ebx + andl $255, %edi + call getIntelCacheEntryLineSize + movq %r12, %rdx + movq %r13, %rsi + movq %rbx, %rdi + movq 8(%rsp), %r12 + movq (%rsp), %rbx + movq 16(%rsp), %r13 + leave + jmp getIntelCacheEntryLineSize +.LFE4: + .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize + .align 16 +.globl s_mpi_getProcessorLineSize + .type s_mpi_getProcessorLineSize, @function +s_mpi_getProcessorLineSize: +.LFB7: + pushq %rbp +.LCFI7: + xorl %edi, %edi + movq %rsp, %rbp +.LCFI8: + pushq %r15 +.LCFI9: + leaq -136(%rbp), %r8 + leaq -144(%rbp), %rcx + leaq -152(%rbp), %rdx + pushq %r14 +.LCFI10: + leaq -160(%rbp), %rsi + leaq -128(%rbp), %r14 + pushq %r13 +.LCFI11: + leaq manMap(%rip), %r13 + pushq %r12 +.LCFI12: + movl $9, %r12d + pushq %rbx +.LCFI13: + xorl %ebx, %ebx + subq $200, %rsp +.LCFI14: + call freebl_cpuid + movq -152(%rbp), %rax + movq -160(%rbp), %r15 + movb $0, -116(%rbp) + movl %eax, -128(%rbp) + movq -136(%rbp), %rax + movl %eax, -124(%rbp) + movq -144(%rbp), %rax + movl %eax, -120(%rbp) + .align 16 +.L18: + movslq %ebx,%rax + movq %r14, %rsi + movq (%r13,%rax,8), %rdi + call strcmp@PLT + testl %eax, %eax + cmove %ebx, %r12d + incl %ebx + cmpl $9, %ebx + jle .L18 + testl %r12d, %r12d + jne .L19 + xorl %eax, %eax + decl %r15d + movl $4, -204(%rbp) + movq $0, -200(%rbp) + jle .L21 + leaq -168(%rbp), %r8 + leaq -176(%rbp), %rcx + leaq -184(%rbp), %rdx + leaq -192(%rbp), %rsi + movl $2, %edi + xorl %ebx, %ebx + call freebl_cpuid + movq -192(%rbp), %rdi + movl %edi, %r12d + andl $15, %r12d + cmpl %r12d, %ebx + jl .L30 + jmp .L38 + .align 16 +.L25: + movq -184(%rbp), %rdi + testl $2147483648, %edi + je .L40 +.L26: + movq -176(%rbp), %rdi + testl $2147483648, %edi + je .L41 +.L27: + movq -168(%rbp), %rdi + testl $2147483648, %edi + je .L42 +.L28: + incl %ebx + cmpl %r12d, %ebx + je .L24 + leaq -168(%rbp), %r8 + leaq -176(%rbp), %rcx + leaq -184(%rbp), %rdx + leaq -192(%rbp), %rsi + movl $2, %edi + call freebl_cpuid +.L24: + cmpl %r12d, %ebx + jge .L38 + movq -192(%rbp), %rdi +.L30: + testl $2147483648, %edi + jne .L25 + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + andl $4294967040, %edi + call getIntelRegisterCacheLineSize + movq -184(%rbp), %rdi + testl $2147483648, %edi + jne .L26 +.L40: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + movq -176(%rbp), %rdi + testl $2147483648, %edi + jne .L27 +.L41: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + movq -168(%rbp), %rdi + testl $2147483648, %edi + jne .L28 +.L42: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + jmp .L28 +.L38: + movq -200(%rbp), %rax +.L21: + movq %rax, %rdx + movl $32, %eax + testq %rdx, %rdx + cmoveq %rax, %rdx + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + movq %rdx, %rax + ret +.L19: + leaq -216(%rbp), %r8 + leaq -224(%rbp), %rcx + leaq -232(%rbp), %rdx + leaq -240(%rbp), %rsi + movl $2147483648, %edi + xorl %ebx, %ebx + call freebl_cpuid + movl $2147483652, %eax + cmpq %rax, -240(%rbp) + ja .L43 +.L32: + movq %rbx, %rdx + movl $32, %eax + testq %rdx, %rdx + cmoveq %rax, %rdx + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + movq %rdx, %rax + ret +.L43: + leaq -216(%rbp), %r8 + leaq -224(%rbp), %rcx + leaq -232(%rbp), %rdx + leaq -240(%rbp), %rsi + movl $2147483653, %edi + call freebl_cpuid + movzbq -224(%rbp), %rbx + jmp .L32 +.LFE7: + .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s new file mode 100644 index 0000000000..af17ebcb42 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s @@ -0,0 +1,902 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "mpcpucache.c" +/ .section .rodata.str1.1,"aMS",@progbits,1 + .section .rodata +.LC0: + .string "GenuineIntel" +.LC1: + .string "AuthenticAMD" +.LC2: + .string "CyrixInstead" +.LC3: + .string "CentaurHauls" +.LC4: + .string "NexGenDriven" +.LC5: + .string "GenuineTMx86" +.LC6: + .string "RiseRiseRise" +.LC7: + .string "UMC UMC UMC " +.LC8: + .string "Sis Sis Sis " +.LC9: + .string "Geode by NSC" + .section .data.rel.ro.local,"aw",@progbits + .align 32 + .type manMap, @object + .size manMap, 40 +manMap: + .long .LC0 + .long .LC1 + .long .LC2 + .long .LC3 + .long .LC4 + .long .LC5 + .long .LC6 + .long .LC7 + .long .LC8 + .long .LC9 + .section .rodata + .align 32 + .type CacheMap, @object + .size CacheMap, 512 +CacheMap: + .byte 0 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .zero 1 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 7 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .text + .align 4 +.globl freebl_cpuid + .type freebl_cpuid, @function +freebl_cpuid: + pushl %ebp + pushl %edi + pushl %esi + subl $8, %esp + movl %edx, %ebp +/APP + pushl %ebx + xorl %ecx, %ecx + cpuid + mov %ebx,%esi + popl %ebx + +/NO_APP + movl %eax, (%ebp) + movl 24(%esp), %eax + movl %esi, (%eax) + movl 28(%esp), %eax + movl %ecx, (%eax) + movl 32(%esp), %eax + movl %edx, (%eax) + addl $8, %esp + popl %esi + popl %edi + popl %ebp + ret + .size freebl_cpuid, .-freebl_cpuid + .align 4 + .type changeFlag, @function +changeFlag: +/APP + pushfl + popl %edx + movl %edx,%ecx + xorl %eax,%edx + pushl %edx + popfl + pushfl + popl %edx + pushl %ecx + popfl + +/NO_APP + xorl %ecx, %edx + movl %edx, %eax + ret + .size changeFlag, .-changeFlag + .align 4 + .type getIntelCacheEntryLineSize, @function +getIntelCacheEntryLineSize: + pushl %edi + pushl %esi + pushl %ebx + call .L17 +.L17: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx + movzbl CacheMap@GOTOFF(%ebx,%eax,2), %ecx + movb 1+CacheMap@GOTOFF(%ebx,%eax,2), %al + testb %al, %al + movl 16(%esp), %edi + je .L3 + cmpl $6, %ecx + je .L6 + cmpl $8, %ecx + je .L6 + movl (%edx), %esi + cmpl $1, %esi + jg .L15 +.L8: + cmpl $2, %esi + jle .L3 + cmpl $12, %ecx + je .L12 + cmpl $14, %ecx + je .L12 + .align 4 +.L3: + popl %ebx + popl %esi + popl %edi + ret + .align 4 +.L6: + movzbl %al, %eax + movl $1, (%edx) + movl %eax, (%edi) +.L16: + popl %ebx + popl %esi + popl %edi + ret + .align 4 +.L15: + cmpl $9, %ecx + je .L9 + cmpl $11, %ecx + jne .L8 +.L9: + movzbl %al, %eax + movl $2, (%edx) + movl %eax, (%edi) + jmp .L16 +.L12: + movzbl %al, %eax + movl $3, (%edx) + movl %eax, (%edi) + jmp .L16 + .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize + .align 4 + .type getIntelRegisterCacheLineSize, @function +getIntelRegisterCacheLineSize: + pushl %ebp + movl %esp, %ebp + pushl %edi + pushl %esi + pushl %ecx + movl 8(%ebp), %edi + movl %eax, %esi + movl %edx, -12(%ebp) + shrl $24, %eax + pushl %edi + call getIntelCacheEntryLineSize + movl %esi, %eax + pushl %edi + shrl $16, %eax + movl -12(%ebp), %edx + andl $255, %eax + call getIntelCacheEntryLineSize + pushl %edi + movl %esi, %edx + movzbl %dh, %eax + movl -12(%ebp), %edx + call getIntelCacheEntryLineSize + andl $255, %esi + movl %edi, 8(%ebp) + movl -12(%ebp), %edx + addl $12, %esp + leal -8(%ebp), %esp + movl %esi, %eax + popl %esi + popl %edi + leave + jmp getIntelCacheEntryLineSize + .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize + .align 4 +.globl s_mpi_getProcessorLineSize + .type s_mpi_getProcessorLineSize, @function +s_mpi_getProcessorLineSize: + pushl %ebp + movl %esp, %ebp + pushl %edi + pushl %esi + pushl %ebx + subl $188, %esp + call .L52 +.L52: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx + movl $9, -168(%ebp) + movl $262144, %eax + call changeFlag + xorl %edx, %edx + testl %eax, %eax + jne .L50 +.L19: + leal -12(%ebp), %esp + popl %ebx + popl %esi + movl %edx, %eax + popl %edi + leave + ret + .align 4 +.L50: + movl $2097152, %eax + call changeFlag + testl %eax, %eax + movl $32, %edx + je .L19 + leal -108(%ebp), %eax + pushl %eax + leal -112(%ebp), %eax + pushl %eax + leal -116(%ebp), %eax + pushl %eax + leal -120(%ebp), %edx + xorl %eax, %eax + call freebl_cpuid + movl -120(%ebp), %eax + movl %eax, -164(%ebp) + movl -116(%ebp), %eax + movl %eax, -104(%ebp) + movl -108(%ebp), %eax + movl %eax, -100(%ebp) + movl -112(%ebp), %eax + movl %eax, -96(%ebp) + movb $0, -92(%ebp) + xorl %esi, %esi + addl $12, %esp + leal -104(%ebp), %edi + .align 4 +.L28: + subl $8, %esp + pushl %edi + pushl manMap@GOTOFF(%ebx,%esi,4) + call strcmp@PLT + addl $16, %esp + testl %eax, %eax + jne .L26 + movl %esi, -168(%ebp) +.L26: + incl %esi + cmpl $9, %esi + jle .L28 + movl -168(%ebp), %eax + testl %eax, %eax + jne .L29 + xorl %eax, %eax + cmpl $1, -164(%ebp) + movl $4, -144(%ebp) + movl $0, -140(%ebp) + jle .L41 + leal -124(%ebp), %edx + movl %edx, -188(%ebp) + leal -128(%ebp), %eax + pushl %edx + movl %eax, -184(%ebp) + leal -132(%ebp), %edx + pushl %eax + movl %edx, -180(%ebp) + movl $2, %eax + pushl %edx + leal -136(%ebp), %edx + call freebl_cpuid + movl -136(%ebp), %eax + movl %eax, %edi + andl $15, %edi + xorl %esi, %esi + addl $12, %esp + leal -140(%ebp), %edx + cmpl %edi, %esi + movl %edx, -176(%ebp) + jl .L40 + jmp .L48 + .align 4 +.L49: + movl -136(%ebp), %eax +.L40: + testl %eax, %eax + js .L35 + xorb %al, %al + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L35: + movl -132(%ebp), %eax + testl %eax, %eax + js .L36 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L36: + movl -128(%ebp), %eax + testl %eax, %eax + js .L37 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L37: + movl -124(%ebp), %eax + testl %eax, %eax + js .L38 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L38: + incl %esi + cmpl %edi, %esi + je .L34 + pushl -188(%ebp) + pushl -184(%ebp) + pushl -180(%ebp) + leal -136(%ebp), %edx + movl $2, %eax + call freebl_cpuid + addl $12, %esp +.L34: + cmpl %edi, %esi + jl .L49 +.L48: + movl -140(%ebp), %eax +.L41: + testl %eax, %eax + jne .L44 + movb $32, %al +.L44: + leal -12(%ebp), %esp + popl %ebx + popl %esi + movl %eax, %edx + movl %edx, %eax + popl %edi + leave + ret +.L29: + leal -148(%ebp), %eax + movl %eax, -192(%ebp) + movl $0, -172(%ebp) + leal -152(%ebp), %edi + pushl %eax + pushl %edi + leal -156(%ebp), %esi + pushl %esi + leal -160(%ebp), %edx + movl $-2147483648, %eax + call freebl_cpuid + addl $12, %esp + cmpl $-2147483644, -160(%ebp) + ja .L51 +.L42: + movl -172(%ebp), %eax + jmp .L41 +.L51: + pushl -192(%ebp) + pushl %edi + pushl %esi + leal -160(%ebp), %edx + movl $-2147483643, %eax + call freebl_cpuid + movzbl -152(%ebp), %edx + addl $12, %esp + movl %edx, -172(%ebp) + jmp .L42 + .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h new file mode 100644 index 0000000000..0cc868a14b --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi-config.h @@ -0,0 +1,56 @@ +/* Default configuration for MPI library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MPI_CONFIG_H_ +#define MPI_CONFIG_H_ + +/* + For boolean options, + 0 = no + 1 = yes + + Other options are documented individually. + + */ + +#ifndef MP_IOFUNC +#define MP_IOFUNC 0 /* include mp_print() ? */ +#endif + +#ifndef MP_MODARITH +#define MP_MODARITH 1 /* include modular arithmetic ? */ +#endif + +#ifndef MP_LOGTAB +#define MP_LOGTAB 1 /* use table of logs instead of log()? */ +#endif + +#ifndef MP_ARGCHK +/* + 0 = no parameter checks + 1 = runtime checks, continue execution and return an error to caller + 2 = assertions; dump core on parameter errors + */ +#ifdef DEBUG +#define MP_ARGCHK 2 /* how to check input arguments */ +#else +#define MP_ARGCHK 1 /* how to check input arguments */ +#endif +#endif + +#ifndef MP_DEBUG +#define MP_DEBUG 0 /* print diagnostic output? */ +#endif + +#ifndef MP_DEFPREC +#define MP_DEFPREC 64 /* default precision, in digits */ +#endif + +#ifndef MP_SQUARE +#define MP_SQUARE 1 /* use separate squaring code? */ +#endif + +#endif /* ifndef MPI_CONFIG_H_ */ diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h new file mode 100644 index 0000000000..9447a818f3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi-priv.h @@ -0,0 +1,243 @@ +/* + * mpi-priv.h - Private header file for MPI + * Arbitrary precision integer arithmetic library + * + * NOTE WELL: the content of this header file is NOT part of the "public" + * API for the MPI library, and may change at any time. + * Application programs that use libmpi should NOT include this header file. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef _MPI_PRIV_H_ +#define _MPI_PRIV_H_ 1 + +#include "mpi.h" +#include +#include +#include + +#if MP_DEBUG +#include + +#define DIAG(T, V) \ + { \ + fprintf(stderr, T); \ + mp_print(V, stderr); \ + fputc('\n', stderr); \ + } +#else +#define DIAG(T, V) +#endif + +/* If we aren't using a wired-in logarithm table, we need to include + the math library to get the log() function + */ + +/* {{{ s_logv_2[] - log table for 2 in various bases */ + +#if MP_LOGTAB +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ + +extern const float s_logv_2[]; +#define LOG_V_2(R) s_logv_2[(R)] + +#else + +/* + If MP_LOGTAB is not defined, use the math library to compute the + logarithms on the fly. Otherwise, use the table. + Pick which works best for your system. + */ + +#include +#define LOG_V_2(R) (log(2.0) / log(R)) + +#endif /* if MP_LOGTAB */ + +/* }}} */ + +/* {{{ Digit arithmetic macros */ + +/* + When adding and multiplying digits, the results can be larger than + can be contained in an mp_digit. Thus, an mp_word is used. These + macros mask off the upper and lower digits of the mp_word (the + mp_word may be more than 2 mp_digits wide, but we only concern + ourselves with the low-order 2 mp_digits) + */ + +#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT) +#define ACCUM(W) (mp_digit)(W) + +#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b)) +#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b)) + +/* }}} */ + +/* {{{ Comparison constants */ + +#define MP_LT -1 +#define MP_EQ 0 +#define MP_GT 1 + +/* }}} */ + +/* {{{ private function declarations */ + +void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */ +void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */ +void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */ +void s_mp_free(void *ptr); /* general free function */ + +mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */ +mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */ + +void s_mp_clamp(mp_int *mp); /* clip leading zeroes */ + +void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */ + +mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */ +void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */ +mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */ +void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */ +void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */ +void s_mp_div_2(mp_int *mp); /* divide by 2 in place */ +mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */ +mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd); +/* normalize for division */ +mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */ +mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */ +mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */ +mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r); +/* unsigned digit divide */ +mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu); +/* Barrett reduction */ +mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */ +mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c); +mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */ +mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c); +mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset); +/* a += b * RADIX^offset */ +mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */ +#if MP_SQUARE +mp_err s_mp_sqr(mp_int *a); /* magnitude square */ +#else +#define s_mp_sqr(a) s_mp_mul(a, a) +#endif +mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */ +mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */ +int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */ +int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */ +int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */ +int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */ + +int s_mp_tovalue(char ch, int r); /* convert ch to value */ +char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */ +int s_mp_outlen(int bits, int r); /* output length in bytes */ +mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */ +mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c); +mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c); +mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c); + +#ifdef NSS_USE_COMBA +PR_STATIC_ASSERT(sizeof(mp_digit) == 8); +#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1))) + +void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C); + +void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_16(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); + +#endif /* end NSS_USE_COMBA */ + +/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */ +#if defined(__OS2__) && defined(__IBMC__) +#define MPI_ASM_DECL __cdecl +#else +#define MPI_ASM_DECL +#endif + +#ifdef MPI_AMD64 + +mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit); +mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit); + +/* c = a * b */ +#define s_mpv_mul_d(a, a_len, b, c) \ + ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b) + +/* c += a * b */ +#define s_mpv_mul_d_add(a, a_len, b, c) \ + ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b) + +#else + +void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c); +void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c); + +#endif + +void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, + mp_size a_len, mp_digit b, + mp_digit *c); +void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a, + mp_size a_len, + mp_digit *sqrs); + +mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, + mp_digit divisor, mp_digit *quot, mp_digit *rem); + +/* c += a * b * (MP_RADIX ** offset); */ +/* Callers of this macro should be aware that the return type might vary; + * it should be treated as a void function. */ +#define s_mp_mul_d_add_offset(a, b, c, off) \ + s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off) + +typedef struct { + mp_int N; /* modulus N */ + mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */ +} mp_mont_modulus; + +mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, + mp_mont_modulus *mmm); +mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm); + +/* + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * see mpcpucache.c for the implementation. + */ +unsigned long s_mpi_getProcessorLineSize(); + +/* }}} */ +#endif diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c new file mode 100644 index 0000000000..2e6cd84664 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi.c @@ -0,0 +1,4975 @@ +/* + * mpi.c + * + * Arbitrary precision integer arithmetic library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mplogic.h" + +#if defined(__arm__) && \ + ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__)) +/* 16-bit thumb or ARM v3 doesn't work inlined assember version */ +#undef MP_ASSEMBLY_MULTIPLY +#undef MP_ASSEMBLY_SQUARE +#endif + +#if MP_LOGTAB +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ +#include "logtab.h" +#endif + +#ifdef CT_VERIF +#include +#endif + +/* {{{ Constant strings */ + +/* Constant strings returned by mp_strerror() */ +static const char *mp_err_string[] = { + "unknown result code", /* say what? */ + "boolean true", /* MP_OKAY, MP_YES */ + "boolean false", /* MP_NO */ + "out of memory", /* MP_MEM */ + "argument out of range", /* MP_RANGE */ + "invalid input parameter", /* MP_BADARG */ + "result is undefined" /* MP_UNDEF */ +}; + +/* Value to digit maps for radix conversion */ + +/* s_dmap_1 - standard digits and letters */ +static const char *s_dmap_1 = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +/* }}} */ + +/* {{{ Default precision manipulation */ + +/* Default precision for newly created mp_int's */ +static mp_size s_mp_defprec = MP_DEFPREC; + +mp_size +mp_get_prec(void) +{ + return s_mp_defprec; + +} /* end mp_get_prec() */ + +void +mp_set_prec(mp_size prec) +{ + if (prec == 0) + s_mp_defprec = MP_DEFPREC; + else + s_mp_defprec = prec; + +} /* end mp_set_prec() */ + +/* }}} */ + +#ifdef CT_VERIF +void +mp_taint(mp_int *mp) +{ + size_t i; + for (i = 0; i < mp->used; ++i) { + VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit)); + } +} + +void +mp_untaint(mp_int *mp) +{ + size_t i; + for (i = 0; i < mp->used; ++i) { + VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit)); + } +} +#endif + +/*------------------------------------------------------------------------*/ +/* {{{ mp_init(mp) */ + +/* + mp_init(mp) + + Initialize a new zero-valued mp_int. Returns MP_OKAY if successful, + MP_MEM if memory could not be allocated for the structure. + */ + +mp_err +mp_init(mp_int *mp) +{ + return mp_init_size(mp, s_mp_defprec); + +} /* end mp_init() */ + +/* }}} */ + +/* {{{ mp_init_size(mp, prec) */ + +/* + mp_init_size(mp, prec) + + Initialize a new zero-valued mp_int with at least the given + precision; returns MP_OKAY if successful, or MP_MEM if memory could + not be allocated for the structure. + */ + +mp_err +mp_init_size(mp_int *mp, mp_size prec) +{ + ARGCHK(mp != NULL && prec > 0, MP_BADARG); + + prec = MP_ROUNDUP(prec, s_mp_defprec); + if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL) + return MP_MEM; + + SIGN(mp) = ZPOS; + USED(mp) = 1; + ALLOC(mp) = prec; + + return MP_OKAY; + +} /* end mp_init_size() */ + +/* }}} */ + +/* {{{ mp_init_copy(mp, from) */ + +/* + mp_init_copy(mp, from) + + Initialize mp as an exact copy of from. Returns MP_OKAY if + successful, MP_MEM if memory could not be allocated for the new + structure. + */ + +mp_err +mp_init_copy(mp_int *mp, const mp_int *from) +{ + ARGCHK(mp != NULL && from != NULL, MP_BADARG); + + if (mp == from) + return MP_OKAY; + + if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), DIGITS(mp), USED(from)); + USED(mp) = USED(from); + ALLOC(mp) = ALLOC(from); + SIGN(mp) = SIGN(from); + + return MP_OKAY; + +} /* end mp_init_copy() */ + +/* }}} */ + +/* {{{ mp_copy(from, to) */ + +/* + mp_copy(from, to) + + Copies the mp_int 'from' to the mp_int 'to'. It is presumed that + 'to' has already been initialized (if not, use mp_init_copy() + instead). If 'from' and 'to' are identical, nothing happens. + */ + +mp_err +mp_copy(const mp_int *from, mp_int *to) +{ + ARGCHK(from != NULL && to != NULL, MP_BADARG); + + if (from == to) + return MP_OKAY; + + { /* copy */ + mp_digit *tmp; + + /* + If the allocated buffer in 'to' already has enough space to hold + all the used digits of 'from', we'll re-use it to avoid hitting + the memory allocater more than necessary; otherwise, we'd have + to grow anyway, so we just allocate a hunk and make the copy as + usual + */ + if (ALLOC(to) >= USED(from)) { + s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from)); + s_mp_copy(DIGITS(from), DIGITS(to), USED(from)); + + } else { + if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), tmp, USED(from)); + + if (DIGITS(to) != NULL) { + s_mp_setz(DIGITS(to), ALLOC(to)); + s_mp_free(DIGITS(to)); + } + + DIGITS(to) = tmp; + ALLOC(to) = ALLOC(from); + } + + /* Copy the precision and sign from the original */ + USED(to) = USED(from); + SIGN(to) = SIGN(from); + } /* end copy */ + + return MP_OKAY; + +} /* end mp_copy() */ + +/* }}} */ + +/* {{{ mp_exch(mp1, mp2) */ + +/* + mp_exch(mp1, mp2) + + Exchange mp1 and mp2 without allocating any intermediate memory + (well, unless you count the stack space needed for this call and the + locals it creates...). This cannot fail. + */ + +void +mp_exch(mp_int *mp1, mp_int *mp2) +{ +#if MP_ARGCHK == 2 + assert(mp1 != NULL && mp2 != NULL); +#else + if (mp1 == NULL || mp2 == NULL) + return; +#endif + + s_mp_exch(mp1, mp2); + +} /* end mp_exch() */ + +/* }}} */ + +/* {{{ mp_clear(mp) */ + +/* + mp_clear(mp) + + Release the storage used by an mp_int, and void its fields so that + if someone calls mp_clear() again for the same int later, we won't + get tollchocked. + */ + +void +mp_clear(mp_int *mp) +{ + if (mp == NULL) + return; + + if (DIGITS(mp) != NULL) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + s_mp_free(DIGITS(mp)); + DIGITS(mp) = NULL; + } + + USED(mp) = 0; + ALLOC(mp) = 0; + +} /* end mp_clear() */ + +/* }}} */ + +/* {{{ mp_zero(mp) */ + +/* + mp_zero(mp) + + Set mp to zero. Does not change the allocated size of the structure, + and therefore cannot fail (except on a bad argument, which we ignore) + */ +void +mp_zero(mp_int *mp) +{ + if (mp == NULL) + return; + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = ZPOS; + +} /* end mp_zero() */ + +/* }}} */ + +/* {{{ mp_set(mp, d) */ + +void +mp_set(mp_int *mp, mp_digit d) +{ + if (mp == NULL) + return; + + mp_zero(mp); + DIGIT(mp, 0) = d; + +} /* end mp_set() */ + +/* }}} */ + +/* {{{ mp_set_int(mp, z) */ + +mp_err +mp_set_int(mp_int *mp, long z) +{ + unsigned long v = labs(z); + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */ + if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */ + return res; + } + + if (z < 0) { + SIGN(mp) = NEG; + } + + return MP_OKAY; +} /* end mp_set_int() */ + +/* }}} */ + +/* {{{ mp_set_ulong(mp, z) */ + +mp_err +mp_set_ulong(mp_int *mp, unsigned long z) +{ + int ix; + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + mp_zero(mp); + if (z == 0) + return MP_OKAY; /* shortcut for zero */ + + if (sizeof z <= sizeof(mp_digit)) { + DIGIT(mp, 0) = z; + } else { + for (ix = sizeof(long) - 1; ix >= 0; ix--) { + if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY) + return res; + + res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX)); + if (res != MP_OKAY) + return res; + } + } + return MP_OKAY; +} /* end mp_set_ulong() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Digit arithmetic */ + +/* {{{ mp_add_d(a, d, b) */ + +/* + mp_add_d(a, d, b) + + Compute the sum b = a + d, for a single digit d. Respects the sign of + its primary addend (single digits are unsigned anyway). + */ + +mp_err +mp_add_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_int tmp; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + + if (SIGN(&tmp) == ZPOS) { + if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else if (s_mp_cmp_d(&tmp, d) >= 0) { + if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else { + mp_neg(&tmp, &tmp); + + DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0); + } + + if (s_mp_cmp_d(&tmp, 0) == 0) + SIGN(&tmp) = ZPOS; + + s_mp_exch(&tmp, b); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_add_d() */ + +/* }}} */ + +/* {{{ mp_sub_d(a, d, b) */ + +/* + mp_sub_d(a, d, b) + + Compute the difference b = a - d, for a single digit d. Respects the + sign of its subtrahend (single digits are unsigned anyway). + */ + +mp_err +mp_sub_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_int tmp; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + + if (SIGN(&tmp) == NEG) { + if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else if (s_mp_cmp_d(&tmp, d) >= 0) { + if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else { + mp_neg(&tmp, &tmp); + + DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0); + SIGN(&tmp) = NEG; + } + + if (s_mp_cmp_d(&tmp, 0) == 0) + SIGN(&tmp) = ZPOS; + + s_mp_exch(&tmp, b); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_sub_d() */ + +/* }}} */ + +/* {{{ mp_mul_d(a, d, b) */ + +/* + mp_mul_d(a, d, b) + + Compute the product b = a * d, for a single digit d. Respects the sign + of its multiplicand (single digits are unsigned anyway) + */ + +mp_err +mp_mul_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if (d == 0) { + mp_zero(b); + return MP_OKAY; + } + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + res = s_mp_mul_d(b, d); + + return res; + +} /* end mp_mul_d() */ + +/* }}} */ + +/* {{{ mp_mul_2(a, c) */ + +mp_err +mp_mul_2(const mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + return s_mp_mul_2(c); + +} /* end mp_mul_2() */ + +/* }}} */ + +/* {{{ mp_div_d(a, d, q, r) */ + +/* + mp_div_d(a, d, q, r) + + Compute the quotient q = a / d and remainder r = a mod d, for a + single digit d. Respects the sign of its divisor (single digits are + unsigned anyway). + */ + +mp_err +mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r) +{ + mp_err res; + mp_int qp; + mp_digit rem = 0; + int pow; + + ARGCHK(a != NULL, MP_BADARG); + + if (d == 0) + return MP_RANGE; + + /* Shortcut for powers of two ... */ + if ((pow = s_mp_ispow2d(d)) >= 0) { + mp_digit mask; + + mask = ((mp_digit)1 << pow) - 1; + rem = DIGIT(a, 0) & mask; + + if (q) { + if ((res = mp_copy(a, q)) != MP_OKAY) { + return res; + } + s_mp_div_2d(q, pow); + } + + if (r) + *r = rem; + + return MP_OKAY; + } + + if ((res = mp_init_copy(&qp, a)) != MP_OKAY) + return res; + + res = s_mp_div_d(&qp, d, &rem); + + if (s_mp_cmp_d(&qp, 0) == 0) + SIGN(q) = ZPOS; + + if (r) { + *r = rem; + } + + if (q) + s_mp_exch(&qp, q); + + mp_clear(&qp); + return res; + +} /* end mp_div_d() */ + +/* }}} */ + +/* {{{ mp_div_2(a, c) */ + +/* + mp_div_2(a, c) + + Compute c = a / 2, disregarding the remainder. + */ + +mp_err +mp_div_2(const mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + s_mp_div_2(c); + + return MP_OKAY; + +} /* end mp_div_2() */ + +/* }}} */ + +/* {{{ mp_expt_d(a, d, b) */ + +mp_err +mp_expt_d(const mp_int *a, mp_digit d, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + DIGIT(&s, 0) = 1; + + while (d != 0) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Full arithmetic */ + +/* {{{ mp_abs(a, b) */ + +/* + mp_abs(a, b) + + Compute b = |a|. 'a' and 'b' may be identical. + */ + +mp_err +mp_abs(const mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + SIGN(b) = ZPOS; + + return MP_OKAY; + +} /* end mp_abs() */ + +/* }}} */ + +/* {{{ mp_neg(a, b) */ + +/* + mp_neg(a, b) + + Compute b = -a. 'a' and 'b' may be identical. + */ + +mp_err +mp_neg(const mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if (s_mp_cmp_d(b, 0) == MP_EQ) + SIGN(b) = ZPOS; + else + SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG; + + return MP_OKAY; + +} /* end mp_neg() */ + +/* }}} */ + +/* {{{ mp_add(a, b, c) */ + +/* + mp_add(a, b, c) + + Compute c = a + b. All parameters may be identical. + */ + +mp_err +mp_add(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */ + MP_CHECKOK(s_mp_add_3arg(a, b, c)); + } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b| */ + MP_CHECKOK(s_mp_sub_3arg(a, b, c)); + } else { /* different sign: |a| < |b| */ + MP_CHECKOK(s_mp_sub_3arg(b, a, c)); + } + + if (s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = ZPOS; + +CLEANUP: + return res; + +} /* end mp_add() */ + +/* }}} */ + +/* {{{ mp_sub(a, b, c) */ + +/* + mp_sub(a, b, c) + + Compute c = a - b. All parameters may be identical. + */ + +mp_err +mp_sub(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_err res; + int magDiff; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == b) { + mp_zero(c); + return MP_OKAY; + } + + if (MP_SIGN(a) != MP_SIGN(b)) { + MP_CHECKOK(s_mp_add_3arg(a, b, c)); + } else if (!(magDiff = s_mp_cmp(a, b))) { + mp_zero(c); + res = MP_OKAY; + } else if (magDiff > 0) { + MP_CHECKOK(s_mp_sub_3arg(a, b, c)); + } else { + MP_CHECKOK(s_mp_sub_3arg(b, a, c)); + MP_SIGN(c) = !MP_SIGN(a); + } + + if (s_mp_cmp_d(c, 0) == MP_EQ) + MP_SIGN(c) = MP_ZPOS; + +CLEANUP: + return res; + +} /* end mp_sub() */ + +/* }}} */ + +/* {{{ mp_mul(a, b, c) */ + +/* + mp_mul(a, b, c) + + Compute c = a * b. All parameters may be identical. + */ +mp_err +mp_mul(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pb; + mp_int tmp; + mp_err res; + mp_size ib; + mp_size useda, usedb; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == c) { + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + if (a == b) + b = &tmp; + a = &tmp; + } else if (b == c) { + if ((res = mp_init_copy(&tmp, b)) != MP_OKAY) + return res; + b = &tmp; + } else { + MP_DIGITS(&tmp) = 0; + } + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY) + goto CLEANUP; + +#ifdef NSS_USE_COMBA + if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { + if (MP_USED(a) == 4) { + s_mp_mul_comba_4(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 8) { + s_mp_mul_comba_8(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 16) { + s_mp_mul_comba_16(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 32) { + s_mp_mul_comba_32(a, b, c); + goto CLEANUP; + } + } +#endif + + pb = MP_DIGITS(b); + s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c)); + + /* Outer loop: Digits of b */ + useda = MP_USED(a); + usedb = MP_USED(b); + for (ib = 1; ib < usedb; ib++) { + mp_digit b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); + else + MP_DIGIT(c, ib + useda) = b_i; + } + + s_mp_clamp(c); + + if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = ZPOS; + else + SIGN(c) = NEG; + +CLEANUP: + mp_clear(&tmp); + return res; +} /* end mp_mul() */ + +/* }}} */ + +/* {{{ mp_sqr(a, sqr) */ + +#if MP_SQUARE +/* + Computes the square of a. This can be done more + efficiently than a general multiplication, because many of the + computation steps are redundant when squaring. The inner product + step is a bit more complicated, but we save a fair number of + iterations of the multiplication loop. + */ + +/* sqr = a^2; Caller provides both a and tmp; */ +mp_err +mp_sqr(const mp_int *a, mp_int *sqr) +{ + mp_digit *pa; + mp_digit d; + mp_err res; + mp_size ix; + mp_int tmp; + int count; + + ARGCHK(a != NULL && sqr != NULL, MP_BADARG); + + if (a == sqr) { + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + a = &tmp; + } else { + DIGITS(&tmp) = 0; + res = MP_OKAY; + } + + ix = 2 * MP_USED(a); + if (ix > MP_ALLOC(sqr)) { + MP_USED(sqr) = 1; + MP_CHECKOK(s_mp_grow(sqr, ix)); + } + MP_USED(sqr) = ix; + MP_DIGIT(sqr, 0) = 0; + +#ifdef NSS_USE_COMBA + if (IS_POWER_OF_2(MP_USED(a))) { + if (MP_USED(a) == 4) { + s_mp_sqr_comba_4(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 8) { + s_mp_sqr_comba_8(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 16) { + s_mp_sqr_comba_16(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 32) { + s_mp_sqr_comba_32(a, sqr); + goto CLEANUP; + } + } +#endif + + pa = MP_DIGITS(a); + count = MP_USED(a) - 1; + if (count > 0) { + d = *pa++; + s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1); + for (ix = 3; --count > 0; ix += 2) { + d = *pa++; + s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix); + } /* for(ix ...) */ + MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */ + + /* now sqr *= 2 */ + s_mp_mul_2(sqr); + } else { + MP_DIGIT(sqr, 1) = 0; + } + + /* now add the squares of the digits of a to sqr. */ + s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr)); + + SIGN(sqr) = ZPOS; + s_mp_clamp(sqr); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_sqr() */ +#endif + +/* }}} */ + +/* {{{ mp_div(a, b, q, r) */ + +/* + mp_div(a, b, q, r) + + Compute q = a / b and r = a mod b. Input parameters may be re-used + as output parameters. If q or r is NULL, that portion of the + computation will be discarded (although it will still be computed) + */ +mp_err +mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) +{ + mp_err res; + mp_int *pQ, *pR; + mp_int qtmp, rtmp, btmp; + int cmp; + mp_sign signA; + mp_sign signB; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + signA = MP_SIGN(a); + signB = MP_SIGN(b); + + if (mp_cmp_z(b) == MP_EQ) + return MP_RANGE; + + DIGITS(&qtmp) = 0; + DIGITS(&rtmp) = 0; + DIGITS(&btmp) = 0; + + /* Set up some temporaries... */ + if (!r || r == a || r == b) { + MP_CHECKOK(mp_init_copy(&rtmp, a)); + pR = &rtmp; + } else { + MP_CHECKOK(mp_copy(a, r)); + pR = r; + } + + if (!q || q == a || q == b) { + MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a))); + pQ = &qtmp; + } else { + MP_CHECKOK(s_mp_pad(q, MP_USED(a))); + pQ = q; + mp_zero(pQ); + } + + /* + If |a| <= |b|, we can compute the solution without division; + otherwise, we actually do the work required. + */ + if ((cmp = s_mp_cmp(a, b)) <= 0) { + if (cmp) { + /* r was set to a above. */ + mp_zero(pQ); + } else { + mp_set(pQ, 1); + mp_zero(pR); + } + } else { + MP_CHECKOK(mp_init_copy(&btmp, b)); + MP_CHECKOK(s_mp_div(pR, &btmp, pQ)); + } + + /* Compute the signs for the output */ + MP_SIGN(pR) = signA; /* Sr = Sa */ + /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */ + MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG; + + if (s_mp_cmp_d(pQ, 0) == MP_EQ) + SIGN(pQ) = ZPOS; + if (s_mp_cmp_d(pR, 0) == MP_EQ) + SIGN(pR) = ZPOS; + + /* Copy output, if it is needed */ + if (q && q != pQ) + s_mp_exch(pQ, q); + + if (r && r != pR) + s_mp_exch(pR, r); + +CLEANUP: + mp_clear(&btmp); + mp_clear(&rtmp); + mp_clear(&qtmp); + + return res; + +} /* end mp_div() */ + +/* }}} */ + +/* {{{ mp_div_2d(a, d, q, r) */ + +mp_err +mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r) +{ + mp_err res; + + ARGCHK(a != NULL, MP_BADARG); + + if (q) { + if ((res = mp_copy(a, q)) != MP_OKAY) + return res; + } + if (r) { + if ((res = mp_copy(a, r)) != MP_OKAY) + return res; + } + if (q) { + s_mp_div_2d(q, d); + } + if (r) { + s_mp_mod_2d(r, d); + } + + return MP_OKAY; + +} /* end mp_div_2d() */ + +/* }}} */ + +/* {{{ mp_expt(a, b, c) */ + +/* + mp_expt(a, b, c) + + Compute c = a ** b, that is, raise a to the b power. Uses a + standard iterative square-and-multiply technique. + */ + +mp_err +mp_expt(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int s, x; + mp_err res; + mp_digit d; + unsigned int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(b) < 0) + return MP_RANGE; + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + + mp_set(&s, 1); + + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + /* Loop over low-order digits in ascending order */ + for (dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over bits of each non-maximal digit */ + for (bit = 0; bit < DIGIT_BIT; bit++) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Consider now the last digit... */ + d = DIGIT(b, dig); + + while (d) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + if (mp_iseven(b)) + SIGN(&s) = SIGN(a); + + res = mp_copy(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt() */ + +/* }}} */ + +/* {{{ mp_2expt(a, k) */ + +/* Compute a = 2^k */ + +mp_err +mp_2expt(mp_int *a, mp_digit k) +{ + ARGCHK(a != NULL, MP_BADARG); + + return s_mp_2expt(a, k); + +} /* end mp_2expt() */ + +/* }}} */ + +/* {{{ mp_mod(a, m, c) */ + +/* + mp_mod(a, m, c) + + Compute c = a (mod m). Result will always be 0 <= c < m. + */ + +mp_err +mp_mod(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + int mag; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (SIGN(m) == NEG) + return MP_RANGE; + + /* + If |a| > m, we need to divide to get the remainder and take the + absolute value. + + If |a| < m, we don't need to do any division, just copy and adjust + the sign (if a is negative). + + If |a| == m, we can simply set the result to zero. + + This order is intended to minimize the average path length of the + comparison chain on common workloads -- the most frequent cases are + that |a| != m, so we do those first. + */ + if ((mag = s_mp_cmp(a, m)) > 0) { + if ((res = mp_div(a, m, NULL, c)) != MP_OKAY) + return res; + + if (SIGN(c) == NEG) { + if ((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else if (mag < 0) { + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + if (mp_cmp_z(a) < 0) { + if ((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else { + mp_zero(c); + } + + return MP_OKAY; + +} /* end mp_mod() */ + +/* }}} */ + +/* {{{ mp_mod_d(a, d, c) */ + +/* + mp_mod_d(a, d, c) + + Compute c = a (mod d). Result will always be 0 <= c < d + */ +mp_err +mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if (s_mp_cmp_d(a, d) > 0) { + if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY) + return res; + + } else { + if (SIGN(a) == NEG) + rem = d - DIGIT(a, 0); + else + rem = DIGIT(a, 0); + } + + if (c) + *c = rem; + + return MP_OKAY; + +} /* end mp_mod_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Modular arithmetic */ + +#if MP_MODARITH +/* {{{ mp_addmod(a, b, m, c) */ + +/* + mp_addmod(a, b, m, c) + + Compute c = (a + b) mod m + */ + +mp_err +mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_add(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_submod(a, b, m, c) */ + +/* + mp_submod(a, b, m, c) + + Compute c = (a - b) mod m + */ + +mp_err +mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_sub(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_mulmod(a, b, m, c) */ + +/* + mp_mulmod(a, b, m, c) + + Compute c = (a * b) mod m + */ + +mp_err +mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_mul(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_sqrmod(a, m, c) */ + +#if MP_SQUARE +mp_err +mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_sqr(a, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} /* end mp_sqrmod() */ +#endif + +/* }}} */ + +/* {{{ s_mp_exptmod(a, b, m, c) */ + +/* + s_mp_exptmod(a, b, m, c) + + Compute c = (a ** b) mod m. Uses a standard square-and-multiply + method with modular reductions at each step. (This is basically the + same code as mp_expt(), except for the addition of the reductions) + + The modular reductions are done using Barrett's algorithm (see + s_mp_reduce() below for details) + */ + +mp_err +s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_int s, x, mu; + mp_err res; + mp_digit d; + unsigned int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG); + + if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0) + return MP_RANGE; + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto X; + if ((res = mp_init(&mu)) != MP_OKAY) + goto MU; + + mp_set(&s, 1); + + /* mu = b^2k / m */ + if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY) + goto CLEANUP; + if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY) + goto CLEANUP; + + /* Loop over digits of b in ascending order, except highest order */ + for (dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over the bits of the lower-order digits */ + for (bit = 0; bit < DIGIT_BIT; bit++) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Now do the last digit... */ + d = DIGIT(b, dig); + + while (d) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&mu); +MU: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end s_mp_exptmod() */ + +/* }}} */ + +/* {{{ mp_exptmod_d(a, d, m, c) */ + +mp_err +mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG); + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + mp_set(&s, 1); + + while (d != 0) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY || + (res = mp_mod(&s, m, &s)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if ((res = s_mp_sqr(&x)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_exptmod_d() */ + +/* }}} */ +#endif /* if MP_MODARITH */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Comparison functions */ + +/* {{{ mp_cmp_z(a) */ + +/* + mp_cmp_z(a) + + Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0. + */ + +int +mp_cmp_z(const mp_int *a) +{ + ARGMPCHK(a != NULL); + + if (SIGN(a) == NEG) + return MP_LT; + else if (USED(a) == 1 && DIGIT(a, 0) == 0) + return MP_EQ; + else + return MP_GT; + +} /* end mp_cmp_z() */ + +/* }}} */ + +/* {{{ mp_cmp_d(a, d) */ + +/* + mp_cmp_d(a, d) + + Compare a <=> d. Returns <0 if a0 if a>d + */ + +int +mp_cmp_d(const mp_int *a, mp_digit d) +{ + ARGCHK(a != NULL, MP_EQ); + + if (SIGN(a) == NEG) + return MP_LT; + + return s_mp_cmp_d(a, d); + +} /* end mp_cmp_d() */ + +/* }}} */ + +/* {{{ mp_cmp(a, b) */ + +int +mp_cmp(const mp_int *a, const mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + if (SIGN(a) == SIGN(b)) { + int mag; + + if ((mag = s_mp_cmp(a, b)) == MP_EQ) + return MP_EQ; + + if (SIGN(a) == ZPOS) + return mag; + else + return -mag; + + } else if (SIGN(a) == ZPOS) { + return MP_GT; + } else { + return MP_LT; + } + +} /* end mp_cmp() */ + +/* }}} */ + +/* {{{ mp_cmp_mag(a, b) */ + +/* + mp_cmp_mag(a, b) + + Compares |a| <=> |b|, and returns an appropriate comparison result + */ + +int +mp_cmp_mag(const mp_int *a, const mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + return s_mp_cmp(a, b); + +} /* end mp_cmp_mag() */ + +/* }}} */ + +/* {{{ mp_isodd(a) */ + +/* + mp_isodd(a) + + Returns a true (non-zero) value if a is odd, false (zero) otherwise. + */ +int +mp_isodd(const mp_int *a) +{ + ARGMPCHK(a != NULL); + + return (int)(DIGIT(a, 0) & 1); + +} /* end mp_isodd() */ + +/* }}} */ + +/* {{{ mp_iseven(a) */ + +int +mp_iseven(const mp_int *a) +{ + return !mp_isodd(a); + +} /* end mp_iseven() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Number theoretic functions */ + +/* {{{ mp_gcd(a, b, c) */ + +/* + Computes the GCD using the constant-time algorithm + by Bernstein and Yang (https://eprint.iacr.org/2019/266) + "Fast constant-time gcd computation and modular inversion" + */ +mp_err +mp_gcd(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + mp_digit cond = 0, mask = 0; + mp_int g, temp, f; + int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1; + mp_size top, flen, glen; + mp_int *clear[3]; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + /* + Early exit if either of the inputs is zero. + Caller is responsible for the proper handling of inputs. + */ + if (mp_cmp_z(a) == MP_EQ) { + res = mp_copy(b, c); + SIGN(c) = ZPOS; + return res; + } else if (mp_cmp_z(b) == MP_EQ) { + res = mp_copy(a, c); + SIGN(c) = ZPOS; + return res; + } + + MP_CHECKOK(mp_init(&temp)); + clear[++last] = &temp; + MP_CHECKOK(mp_init_copy(&g, a)); + clear[++last] = &g; + MP_CHECKOK(mp_init_copy(&f, b)); + clear[++last] = &f; + + /* + For even case compute the number of + shared powers of 2 in f and g. + */ + for (i = 0; i < USED(&f) && i < USED(&g); i++) { + mask = ~(DIGIT(&f, i) | DIGIT(&g, i)); + for (j = 0; j < MP_DIGIT_BIT; j++) { + bit &= mask; + shifts += bit; + mask >>= 1; + } + } + /* Reduce to the odd case by removing the powers of 2. */ + s_mp_div_2d(&f, shifts); + s_mp_div_2d(&g, shifts); + + /* Allocate to the size of largest mp_int. */ + top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g)); + MP_CHECKOK(s_mp_grow(&f, top)); + MP_CHECKOK(s_mp_grow(&g, top)); + MP_CHECKOK(s_mp_grow(&temp, top)); + + /* Make sure f contains the odd value. */ + MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top)); + + /* Upper bound for the total iterations. */ + flen = mpl_significant_bits(&f); + glen = mpl_significant_bits(&g); + m = 4 + 3 * ((flen >= glen) ? flen : glen); + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit +#endif + + for (i = 0; i < m; i++) { + /* Step 1: conditional swap. */ + /* Set cond if delta > 0 and g is odd. */ + cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1; + /* If cond is set replace (delta,f) with (-delta,-f). */ + delta = (-cond & -delta) | ((cond - 1) & delta); + SIGN(&f) ^= cond; + /* If cond is set swap f with g. */ + MP_CHECKOK(mp_cswap(cond, &f, &g, top)); + + /* Step 2: elemination. */ + /* Update delta. */ + delta++; + /* If g is odd, right shift (g+f) else right shift g. */ + MP_CHECKOK(mp_add(&g, &f, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top)); + s_mp_div_2(&g); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* GCD is in f, take the absolute value. */ + SIGN(&f) = ZPOS; + + /* Add back the removed powers of 2. */ + MP_CHECKOK(s_mp_mul_2d(&f, shifts)); + + MP_CHECKOK(mp_copy(&f, c)); + +CLEANUP: + while (last >= 0) + mp_clear(clear[last--]); + return res; +} /* end mp_gcd() */ + +/* }}} */ + +/* {{{ mp_lcm(a, b, c) */ + +/* We compute the least common multiple using the rule: + + ab = [a, b](a, b) + + ... by computing the product, and dividing out the gcd. + */ + +mp_err +mp_lcm(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int gcd, prod; + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + /* Set up temporaries */ + if ((res = mp_init(&gcd)) != MP_OKAY) + return res; + if ((res = mp_init(&prod)) != MP_OKAY) + goto GCD; + + if ((res = mp_mul(a, b, &prod)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY) + goto CLEANUP; + + res = mp_div(&prod, &gcd, c, NULL); + +CLEANUP: + mp_clear(&prod); +GCD: + mp_clear(&gcd); + + return res; + +} /* end mp_lcm() */ + +/* }}} */ + +/* {{{ mp_xgcd(a, b, g, x, y) */ + +/* + mp_xgcd(a, b, g, x, y) + + Compute g = (a, b) and values x and y satisfying Bezout's identity + (that is, ax + by = g). This uses the binary extended GCD algorithm + based on the Stein algorithm used for mp_gcd() + See algorithm 14.61 in Handbook of Applied Cryptogrpahy. + */ + +mp_err +mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y) +{ + mp_int gx, xc, yc, u, v, A, B, C, D; + mp_int *clean[9]; + mp_err res; + int last = -1; + + if (mp_cmp_z(b) == 0) + return MP_RANGE; + + /* Initialize all these variables we need */ + MP_CHECKOK(mp_init(&u)); + clean[++last] = &u; + MP_CHECKOK(mp_init(&v)); + clean[++last] = &v; + MP_CHECKOK(mp_init(&gx)); + clean[++last] = &gx; + MP_CHECKOK(mp_init(&A)); + clean[++last] = &A; + MP_CHECKOK(mp_init(&B)); + clean[++last] = &B; + MP_CHECKOK(mp_init(&C)); + clean[++last] = &C; + MP_CHECKOK(mp_init(&D)); + clean[++last] = &D; + MP_CHECKOK(mp_init_copy(&xc, a)); + clean[++last] = &xc; + mp_abs(&xc, &xc); + MP_CHECKOK(mp_init_copy(&yc, b)); + clean[++last] = &yc; + mp_abs(&yc, &yc); + + mp_set(&gx, 1); + + /* Divide by two until at least one of them is odd */ + while (mp_iseven(&xc) && mp_iseven(&yc)) { + mp_size nx = mp_trailing_zeros(&xc); + mp_size ny = mp_trailing_zeros(&yc); + mp_size n = MP_MIN(nx, ny); + s_mp_div_2d(&xc, n); + s_mp_div_2d(&yc, n); + MP_CHECKOK(s_mp_mul_2d(&gx, n)); + } + + MP_CHECKOK(mp_copy(&xc, &u)); + MP_CHECKOK(mp_copy(&yc, &v)); + mp_set(&A, 1); + mp_set(&D, 1); + + /* Loop through binary GCD algorithm */ + do { + while (mp_iseven(&u)) { + s_mp_div_2(&u); + + if (mp_iseven(&A) && mp_iseven(&B)) { + s_mp_div_2(&A); + s_mp_div_2(&B); + } else { + MP_CHECKOK(mp_add(&A, &yc, &A)); + s_mp_div_2(&A); + MP_CHECKOK(mp_sub(&B, &xc, &B)); + s_mp_div_2(&B); + } + } + + while (mp_iseven(&v)) { + s_mp_div_2(&v); + + if (mp_iseven(&C) && mp_iseven(&D)) { + s_mp_div_2(&C); + s_mp_div_2(&D); + } else { + MP_CHECKOK(mp_add(&C, &yc, &C)); + s_mp_div_2(&C); + MP_CHECKOK(mp_sub(&D, &xc, &D)); + s_mp_div_2(&D); + } + } + + if (mp_cmp(&u, &v) >= 0) { + MP_CHECKOK(mp_sub(&u, &v, &u)); + MP_CHECKOK(mp_sub(&A, &C, &A)); + MP_CHECKOK(mp_sub(&B, &D, &B)); + } else { + MP_CHECKOK(mp_sub(&v, &u, &v)); + MP_CHECKOK(mp_sub(&C, &A, &C)); + MP_CHECKOK(mp_sub(&D, &B, &D)); + } + } while (mp_cmp_z(&u) != 0); + + /* copy results to output */ + if (x) + MP_CHECKOK(mp_copy(&C, x)); + + if (y) + MP_CHECKOK(mp_copy(&D, y)); + + if (g) + MP_CHECKOK(mp_mul(&gx, &v, g)); + +CLEANUP: + while (last >= 0) + mp_clear(clean[last--]); + + return res; + +} /* end mp_xgcd() */ + +/* }}} */ + +mp_size +mp_trailing_zeros(const mp_int *mp) +{ + mp_digit d; + mp_size n = 0; + unsigned int ix; + + if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp)) + return n; + + for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix) + n += MP_DIGIT_BIT; + if (!d) + return 0; /* shouldn't happen, but ... */ +#if !defined(MP_USE_UINT_DIGIT) + if (!(d & 0xffffffffU)) { + d >>= 32; + n += 32; + } +#endif + if (!(d & 0xffffU)) { + d >>= 16; + n += 16; + } + if (!(d & 0xffU)) { + d >>= 8; + n += 8; + } + if (!(d & 0xfU)) { + d >>= 4; + n += 4; + } + if (!(d & 0x3U)) { + d >>= 2; + n += 2; + } + if (!(d & 0x1U)) { + d >>= 1; + n += 1; + } +#if MP_ARGCHK == 2 + assert(0 != (d & 1)); +#endif + return n; +} + +/* Given a and prime p, computes c and k such that a*c == 2**k (mod p). +** Returns k (positive) or error (negative). +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_err +s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c) +{ + mp_err res; + mp_err k = 0; + mp_int d, f, g; + + ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG); + + MP_DIGITS(&d) = 0; + MP_DIGITS(&f) = 0; + MP_DIGITS(&g) = 0; + MP_CHECKOK(mp_init(&d)); + MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */ + MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */ + + mp_set(c, 1); + mp_zero(&d); + + if (mp_cmp_z(&f) == 0) { + res = MP_UNDEF; + } else + for (;;) { + int diff_sign; + while (mp_iseven(&f)) { + mp_size n = mp_trailing_zeros(&f); + if (!n) { + res = MP_UNDEF; + goto CLEANUP; + } + s_mp_div_2d(&f, n); + MP_CHECKOK(s_mp_mul_2d(&d, n)); + k += n; + } + if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */ + res = k; + break; + } + diff_sign = mp_cmp(&f, &g); + if (diff_sign < 0) { /* f < g */ + s_mp_exch(&f, &g); + s_mp_exch(c, &d); + } else if (diff_sign == 0) { /* f == g */ + res = MP_UNDEF; /* a and p are not relatively prime */ + break; + } + if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) { + MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */ + MP_CHECKOK(mp_sub(c, &d, c)); /* c = c - d */ + } else { + MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */ + MP_CHECKOK(mp_add(c, &d, c)); /* c = c + d */ + } + } + if (res >= 0) { + if (mp_cmp_mag(c, p) >= 0) { + MP_CHECKOK(mp_div(c, p, NULL, c)); + } + if (MP_SIGN(c) != MP_ZPOS) { + MP_CHECKOK(mp_add(c, p, c)); + } + res = k; + } + +CLEANUP: + mp_clear(&d); + mp_clear(&f); + mp_clear(&g); + return res; +} + +/* Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits. +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_digit +s_mp_invmod_radix(mp_digit P) +{ + mp_digit T = P; + T *= 2 - (P * T); + T *= 2 - (P * T); + T *= 2 - (P * T); + T *= 2 - (P * T); +#if !defined(MP_USE_UINT_DIGIT) + T *= 2 - (P * T); + T *= 2 - (P * T); +#endif + return T; +} + +/* Given c, k, and prime p, where a*c == 2**k (mod p), +** Compute x = (a ** -1) mod p. This is similar to Montgomery reduction. +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_err +s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x) +{ + int k_orig = k; + mp_digit r; + mp_size ix; + mp_err res; + + if (mp_cmp_z(c) < 0) { /* c < 0 */ + MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */ + } else { + MP_CHECKOK(mp_copy(c, x)); /* x = c */ + } + + /* make sure x is large enough */ + ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1; + ix = MP_MAX(ix, MP_USED(x)); + MP_CHECKOK(s_mp_pad(x, ix)); + + r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0)); + + for (ix = 0; k > 0; ix++) { + int j = MP_MIN(k, MP_DIGIT_BIT); + mp_digit v = r * MP_DIGIT(x, ix); + if (j < MP_DIGIT_BIT) { + v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */ + } + s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */ + k -= j; + } + s_mp_clamp(x); + s_mp_div_2d(x, k_orig); + res = MP_OKAY; + +CLEANUP: + return res; +} + +/* + Computes the modular inverse using the constant-time algorithm + by Bernstein and Yang (https://eprint.iacr.org/2019/266) + "Fast constant-time gcd computation and modular inversion" + */ +mp_err +s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + mp_digit cond = 0; + mp_int g, f, v, r, temp; + int i, its, delta = 1, last = -1; + mp_size top, flen, glen; + mp_int *clear[6]; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + /* Check for invalid inputs. */ + if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT) + return MP_RANGE; + + if (a == m || mp_iseven(m)) + return MP_UNDEF; + + MP_CHECKOK(mp_init(&temp)); + clear[++last] = &temp; + MP_CHECKOK(mp_init(&v)); + clear[++last] = &v; + MP_CHECKOK(mp_init(&r)); + clear[++last] = &r; + MP_CHECKOK(mp_init_copy(&g, a)); + clear[++last] = &g; + MP_CHECKOK(mp_init_copy(&f, m)); + clear[++last] = &f; + + mp_set(&v, 0); + mp_set(&r, 1); + + /* Allocate to the size of largest mp_int. */ + top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g)); + MP_CHECKOK(s_mp_grow(&f, top)); + MP_CHECKOK(s_mp_grow(&g, top)); + MP_CHECKOK(s_mp_grow(&temp, top)); + MP_CHECKOK(s_mp_grow(&v, top)); + MP_CHECKOK(s_mp_grow(&r, top)); + + /* Upper bound for the total iterations. */ + flen = mpl_significant_bits(&f); + glen = mpl_significant_bits(&g); + its = 4 + 3 * ((flen >= glen) ? flen : glen); + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit +#endif + + for (i = 0; i < its; i++) { + /* Step 1: conditional swap. */ + /* Set cond if delta > 0 and g is odd. */ + cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1; + /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */ + delta = (-cond & -delta) | ((cond - 1) & delta); + SIGN(&f) ^= cond; + SIGN(&v) ^= cond; + /* If cond is set swap (f,v) with (g,r). */ + MP_CHECKOK(mp_cswap(cond, &f, &g, top)); + MP_CHECKOK(mp_cswap(cond, &v, &r, top)); + + /* Step 2: elemination. */ + /* Update delta */ + delta++; + /* If g is odd replace r with (r+v). */ + MP_CHECKOK(mp_add(&r, &v, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top)); + /* If g is odd, right shift (g+f) else right shift g. */ + MP_CHECKOK(mp_add(&g, &f, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top)); + s_mp_div_2(&g); + /* + If r is even, right shift it. + If r is odd, right shift (r+m) which is even because m is odd. + We want the result modulo m so adding in multiples of m here vanish. + */ + MP_CHECKOK(mp_add(&r, m, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top)); + s_mp_div_2(&r); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* We have the inverse in v, propagate sign from f. */ + SIGN(&v) ^= SIGN(&f); + /* GCD is in f, take the absolute value. */ + SIGN(&f) = ZPOS; + + /* If gcd != 1, not invertible. */ + if (mp_cmp_d(&f, 1) != MP_EQ) { + res = MP_UNDEF; + goto CLEANUP; + } + + /* Return inverse modulo m. */ + MP_CHECKOK(mp_mod(&v, m, c)); + +CLEANUP: + while (last >= 0) + mp_clear(clear[last--]); + return res; +} + +/* Known good algorithm for computing modular inverse. But slow. */ +mp_err +mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_int g, x; + mp_err res; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_CHECKOK(mp_init(&x)); + MP_CHECKOK(mp_init(&g)); + + MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL)); + + if (mp_cmp_d(&g, 1) != MP_EQ) { + res = MP_UNDEF; + goto CLEANUP; + } + + res = mp_mod(&x, m, c); + SIGN(c) = SIGN(a); + +CLEANUP: + mp_clear(&x); + mp_clear(&g); + + return res; +} + +/* modular inverse where modulus is 2**k. */ +/* c = a**-1 mod 2**k */ +mp_err +s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c) +{ + mp_err res; + mp_size ix = k + 4; + mp_int t0, t1, val, tmp, two2k; + + static const mp_digit d2 = 2; + static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 }; + + if (mp_iseven(a)) + return MP_UNDEF; + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit +#endif + if (k <= MP_DIGIT_BIT) { + mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0)); + /* propagate the sign from mp_int */ + i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a); + if (k < MP_DIGIT_BIT) + i &= ((mp_digit)1 << k) - (mp_digit)1; + mp_set(c, i); + return MP_OKAY; + } +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + MP_DIGITS(&t0) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&val) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&two2k) = 0; + MP_CHECKOK(mp_init_copy(&val, a)); + s_mp_mod_2d(&val, k); + MP_CHECKOK(mp_init_copy(&t0, &val)); + MP_CHECKOK(mp_init_copy(&t1, &t0)); + MP_CHECKOK(mp_init(&tmp)); + MP_CHECKOK(mp_init(&two2k)); + MP_CHECKOK(s_mp_2expt(&two2k, k)); + do { + MP_CHECKOK(mp_mul(&val, &t1, &tmp)); + MP_CHECKOK(mp_sub(&two, &tmp, &tmp)); + MP_CHECKOK(mp_mul(&t1, &tmp, &t1)); + s_mp_mod_2d(&t1, k); + while (MP_SIGN(&t1) != MP_ZPOS) { + MP_CHECKOK(mp_add(&t1, &two2k, &t1)); + } + if (mp_cmp(&t1, &t0) == MP_EQ) + break; + MP_CHECKOK(mp_copy(&t1, &t0)); + } while (--ix > 0); + if (!ix) { + res = MP_UNDEF; + } else { + mp_exch(c, &t1); + } + +CLEANUP: + mp_clear(&t0); + mp_clear(&t1); + mp_clear(&val); + mp_clear(&tmp); + mp_clear(&two2k); + return res; +} + +mp_err +s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + mp_size k; + mp_int oddFactor, evenFactor; /* factors of the modulus */ + mp_int oddPart, evenPart; /* parts to combine via CRT. */ + mp_int C2, tmp1, tmp2; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + /*static const mp_digit d1 = 1; */ + /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */ + + if ((res = s_mp_ispow2(m)) >= 0) { + k = res; + return s_mp_invmod_2d(a, k, c); + } + MP_DIGITS(&oddFactor) = 0; + MP_DIGITS(&evenFactor) = 0; + MP_DIGITS(&oddPart) = 0; + MP_DIGITS(&evenPart) = 0; + MP_DIGITS(&C2) = 0; + MP_DIGITS(&tmp1) = 0; + MP_DIGITS(&tmp2) = 0; + + MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */ + MP_CHECKOK(mp_init(&evenFactor)); + MP_CHECKOK(mp_init(&oddPart)); + MP_CHECKOK(mp_init(&evenPart)); + MP_CHECKOK(mp_init(&C2)); + MP_CHECKOK(mp_init(&tmp1)); + MP_CHECKOK(mp_init(&tmp2)); + + k = mp_trailing_zeros(m); + s_mp_div_2d(&oddFactor, k); + MP_CHECKOK(s_mp_2expt(&evenFactor, k)); + + /* compute a**-1 mod oddFactor. */ + MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart)); + /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */ + MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart)); + + /* Use Chinese Remainer theorem to compute a**-1 mod m. */ + /* let m1 = oddFactor, v1 = oddPart, + * let m2 = evenFactor, v2 = evenPart. + */ + + /* Compute C2 = m1**-1 mod m2. */ + MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2)); + + /* compute u = (v2 - v1)*C2 mod m2 */ + MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1)); + MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2)); + s_mp_mod_2d(&tmp2, k); + while (MP_SIGN(&tmp2) != MP_ZPOS) { + MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2)); + } + + /* compute answer = v1 + u*m1 */ + MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c)); + MP_CHECKOK(mp_add(&oddPart, c, c)); + /* not sure this is necessary, but it's low cost if not. */ + MP_CHECKOK(mp_mod(c, m, c)); + +CLEANUP: + mp_clear(&oddFactor); + mp_clear(&evenFactor); + mp_clear(&oddPart); + mp_clear(&evenPart); + mp_clear(&C2); + mp_clear(&tmp1); + mp_clear(&tmp2); + return res; +} + +/* {{{ mp_invmod(a, m, c) */ + +/* + mp_invmod(a, m, c) + + Compute c = a^-1 (mod m), if there is an inverse for a (mod m). + This is equivalent to the question of whether (a, m) = 1. If not, + MP_UNDEF is returned, and there is no inverse. + */ + +mp_err +mp_invmod(const mp_int *a, const mp_int *m, mp_int *c) +{ + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + if (mp_isodd(m)) { + return s_mp_invmod_odd_m(a, m, c); + } + if (mp_iseven(a)) + return MP_UNDEF; /* not invertable */ + + return s_mp_invmod_even_m(a, m, c); + +} /* end mp_invmod() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ mp_print(mp, ofp) */ + +#if MP_IOFUNC +/* + mp_print(mp, ofp) + + Print a textual representation of the given mp_int on the output + stream 'ofp'. Output is generated using the internal radix. + */ + +void +mp_print(mp_int *mp, FILE *ofp) +{ + int ix; + + if (mp == NULL || ofp == NULL) + return; + + fputc((SIGN(mp) == NEG) ? '-' : '+', ofp); + + for (ix = USED(mp) - 1; ix >= 0; ix--) { + fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix)); + } + +} /* end mp_print() */ + +#endif /* if MP_IOFUNC */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ More I/O Functions */ + +/* {{{ mp_read_raw(mp, str, len) */ + +/* + mp_read_raw(mp, str, len) + + Read in a raw value (base 256) into the given mp_int + */ + +mp_err +mp_read_raw(mp_int *mp, char *str, int len) +{ + int ix; + mp_err res; + unsigned char *ustr = (unsigned char *)str; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + /* Read the rest of the digits */ + for (ix = 1; ix < len; ix++) { + if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY) + return res; + if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY) + return res; + } + + /* Get sign from first byte */ + if (ustr[0]) + SIGN(mp) = NEG; + else + SIGN(mp) = ZPOS; + + return MP_OKAY; + +} /* end mp_read_raw() */ + +/* }}} */ + +/* {{{ mp_raw_size(mp) */ + +int +mp_raw_size(mp_int *mp) +{ + ARGCHK(mp != NULL, 0); + + return (USED(mp) * sizeof(mp_digit)) + 1; + +} /* end mp_raw_size() */ + +/* }}} */ + +/* {{{ mp_toraw(mp, str) */ + +mp_err +mp_toraw(mp_int *mp, char *str) +{ + int ix, jx, pos = 1; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + + str[0] = (char)SIGN(mp); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + str[pos++] = (char)(d >> (jx * CHAR_BIT)); + } + } + + return MP_OKAY; + +} /* end mp_toraw() */ + +/* }}} */ + +/* {{{ mp_read_radix(mp, str, radix) */ + +/* + mp_read_radix(mp, str, radix) + + Read an integer from the given string, and set mp to the resulting + value. The input is presumed to be in base 10. Leading non-digit + characters are ignored, and the function reads until a non-digit + character or the end of the string. + */ + +mp_err +mp_read_radix(mp_int *mp, const char *str, int radix) +{ + int ix = 0, val = 0; + mp_err res; + mp_sign sig = ZPOS; + + ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, + MP_BADARG); + + mp_zero(mp); + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while (str[ix] && + (s_mp_tovalue(str[ix], radix) < 0) && + str[ix] != '-' && + str[ix] != '+') { + ++ix; + } + + if (str[ix] == '-') { + sig = NEG; + ++ix; + } else if (str[ix] == '+') { + sig = ZPOS; /* this is the default anyway... */ + ++ix; + } + + while ((val = s_mp_tovalue(str[ix], radix)) >= 0) { + if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY) + return res; + if ((res = s_mp_add_d(mp, val)) != MP_OKAY) + return res; + ++ix; + } + + if (s_mp_cmp_d(mp, 0) == MP_EQ) + SIGN(mp) = ZPOS; + else + SIGN(mp) = sig; + + return MP_OKAY; + +} /* end mp_read_radix() */ + +mp_err +mp_read_variable_radix(mp_int *a, const char *str, int default_radix) +{ + int radix = default_radix; + int cx; + mp_sign sig = ZPOS; + mp_err res; + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while ((cx = *str) != 0 && + (s_mp_tovalue(cx, radix) < 0) && + cx != '-' && + cx != '+') { + ++str; + } + + if (cx == '-') { + sig = NEG; + ++str; + } else if (cx == '+') { + sig = ZPOS; /* this is the default anyway... */ + ++str; + } + + if (str[0] == '0') { + if ((str[1] | 0x20) == 'x') { + radix = 16; + str += 2; + } else { + radix = 8; + str++; + } + } + res = mp_read_radix(a, str, radix); + if (res == MP_OKAY) { + MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig; + } + return res; +} + +/* }}} */ + +/* {{{ mp_radix_size(mp, radix) */ + +int +mp_radix_size(mp_int *mp, int radix) +{ + int bits; + + if (!mp || radix < 2 || radix > MAX_RADIX) + return 0; + + bits = USED(mp) * DIGIT_BIT - 1; + + return SIGN(mp) + s_mp_outlen(bits, radix); + +} /* end mp_radix_size() */ + +/* }}} */ + +/* {{{ mp_toradix(mp, str, radix) */ + +mp_err +mp_toradix(mp_int *mp, char *str, int radix) +{ + int ix, pos = 0; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE); + + if (mp_cmp_z(mp) == MP_EQ) { + str[0] = '0'; + str[1] = '\0'; + } else { + mp_err res; + mp_int tmp; + mp_sign sgn; + mp_digit rem, rdx = (mp_digit)radix; + char ch; + + if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY) + return res; + + /* Save sign for later, and take absolute value */ + sgn = SIGN(&tmp); + SIGN(&tmp) = ZPOS; + + /* Generate output digits in reverse order */ + while (mp_cmp_z(&tmp) != 0) { + if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + /* Generate digits, use capital letters */ + ch = s_mp_todigit(rem, radix, 0); + + str[pos++] = ch; + } + + /* Add - sign if original value was negative */ + if (sgn == NEG) + str[pos++] = '-'; + + /* Add trailing NUL to end the string */ + str[pos--] = '\0'; + + /* Reverse the digits and sign indicator */ + ix = 0; + while (ix < pos) { + char tmpc = str[ix]; + + str[ix] = str[pos]; + str[pos] = tmpc; + ++ix; + --pos; + } + + mp_clear(&tmp); + } + + return MP_OKAY; + +} /* end mp_toradix() */ + +/* }}} */ + +/* {{{ mp_tovalue(ch, r) */ + +int +mp_tovalue(char ch, int r) +{ + return s_mp_tovalue(ch, r); + +} /* end mp_tovalue() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_strerror(ec) */ + +/* + mp_strerror(ec) + + Return a string describing the meaning of error code 'ec'. The + string returned is allocated in static memory, so the caller should + not attempt to modify or free the memory associated with this + string. + */ +const char * +mp_strerror(mp_err ec) +{ + int aec = (ec < 0) ? -ec : ec; + + /* Code values are negative, so the senses of these comparisons + are accurate */ + if (ec < MP_LAST_CODE || ec > MP_OKAY) { + return mp_err_string[0]; /* unknown error code */ + } else { + return mp_err_string[aec + 1]; + } + +} /* end mp_strerror() */ + +/* }}} */ + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static function definitions (internal use only) */ + +/* {{{ Memory management */ + +/* {{{ s_mp_grow(mp, min) */ + +/* Make sure there are at least 'min' digits allocated to mp */ +mp_err +s_mp_grow(mp_int *mp, mp_size min) +{ + ARGCHK(mp != NULL, MP_BADARG); + + if (min > ALLOC(mp)) { + mp_digit *tmp; + + /* Set min to next nearest default precision block size */ + min = MP_ROUNDUP(min, s_mp_defprec); + + if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(mp), tmp, USED(mp)); + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + s_mp_free(DIGITS(mp)); + DIGITS(mp) = tmp; + ALLOC(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_grow() */ + +/* }}} */ + +/* {{{ s_mp_pad(mp, min) */ + +/* Make sure the used size of mp is at least 'min', growing if needed */ +mp_err +s_mp_pad(mp_int *mp, mp_size min) +{ + ARGCHK(mp != NULL, MP_BADARG); + + if (min > USED(mp)) { + mp_err res; + + /* Make sure there is room to increase precision */ + if (min > ALLOC(mp)) { + if ((res = s_mp_grow(mp, min)) != MP_OKAY) + return res; + } else { + s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); + } + + /* Increase precision; should already be 0-filled */ + USED(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_pad() */ + +/* }}} */ + +/* {{{ s_mp_setz(dp, count) */ + +/* Set 'count' digits pointed to by dp to be zeroes */ +void +s_mp_setz(mp_digit *dp, mp_size count) +{ + memset(dp, 0, count * sizeof(mp_digit)); +} /* end s_mp_setz() */ + +/* }}} */ + +/* {{{ s_mp_copy(sp, dp, count) */ + +/* Copy 'count' digits from sp to dp */ +void +s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count) +{ + memcpy(dp, sp, count * sizeof(mp_digit)); +} /* end s_mp_copy() */ + +/* }}} */ + +/* {{{ s_mp_alloc(nb, ni) */ + +/* Allocate ni records of nb bytes each, and return a pointer to that */ +void * +s_mp_alloc(size_t nb, size_t ni) +{ + return calloc(nb, ni); + +} /* end s_mp_alloc() */ + +/* }}} */ + +/* {{{ s_mp_free(ptr) */ + +/* Free the memory pointed to by ptr */ +void +s_mp_free(void *ptr) +{ + if (ptr) { + free(ptr); + } +} /* end s_mp_free() */ + +/* }}} */ + +/* {{{ s_mp_clamp(mp) */ + +/* Remove leading zeroes from the given value */ +void +s_mp_clamp(mp_int *mp) +{ + mp_size used = MP_USED(mp); + while (used > 1 && DIGIT(mp, used - 1) == 0) + --used; + MP_USED(mp) = used; + if (used == 1 && DIGIT(mp, 0) == 0) + MP_SIGN(mp) = ZPOS; +} /* end s_mp_clamp() */ + +/* }}} */ + +/* {{{ s_mp_exch(a, b) */ + +/* Exchange the data for a and b; (b, a) = (a, b) */ +void +s_mp_exch(mp_int *a, mp_int *b) +{ + mp_int tmp; + if (!a || !b) { + return; + } + + tmp = *a; + *a = *b; + *b = tmp; + +} /* end s_mp_exch() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Arithmetic helpers */ + +/* {{{ s_mp_lshd(mp, p) */ + +/* + Shift mp leftward by p digits, growing if needed, and zero-filling + the in-shifted digits at the right end. This is a convenient + alternative to multiplication by powers of the radix + */ + +mp_err +s_mp_lshd(mp_int *mp, mp_size p) +{ + mp_err res; + unsigned int ix; + + ARGCHK(mp != NULL, MP_BADARG); + + if (p == 0) + return MP_OKAY; + + if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0) + return MP_OKAY; + + if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY) + return res; + + /* Shift all the significant figures over as needed */ + for (ix = USED(mp) - p; ix-- > 0;) { + DIGIT(mp, ix + p) = DIGIT(mp, ix); + } + + /* Fill the bottom digits with zeroes */ + for (ix = 0; (mp_size)ix < p; ix++) + DIGIT(mp, ix) = 0; + + return MP_OKAY; + +} /* end s_mp_lshd() */ + +/* }}} */ + +/* {{{ s_mp_mul_2d(mp, d) */ + +/* + Multiply the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value. + */ +mp_err +s_mp_mul_2d(mp_int *mp, mp_digit d) +{ + mp_err res; + mp_digit dshift, rshift, mask, x, prev = 0; + mp_digit *pa = NULL; + int i; + + ARGCHK(mp != NULL, MP_BADARG); + + dshift = d / MP_DIGIT_BIT; + d %= MP_DIGIT_BIT; + /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */ + /* mod and corresponding mask logic avoid that when d = 0 */ + rshift = MP_DIGIT_BIT - d; + rshift %= MP_DIGIT_BIT; + /* mask = (2**d - 1) * 2**(w-d) mod 2**w */ + mask = (DIGIT_MAX << rshift) + 1; + mask &= DIGIT_MAX - 1; + /* bits to be shifted out of the top word */ + x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask; + + if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0)))) + return res; + + if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift))) + return res; + + pa = MP_DIGITS(mp) + dshift; + + for (i = MP_USED(mp) - dshift; i > 0; i--) { + x = *pa; + *pa++ = (x << d) | prev; + prev = (x & mask) >> rshift; + } + + s_mp_clamp(mp); + return MP_OKAY; +} /* end s_mp_mul_2d() */ + +/* {{{ s_mp_rshd(mp, p) */ + +/* + Shift mp rightward by p digits. Maintains the invariant that + digits above the precision are all zero. Digits shifted off the + end are lost. Cannot fail. + */ + +void +s_mp_rshd(mp_int *mp, mp_size p) +{ + mp_size ix; + mp_digit *src, *dst; + + if (p == 0) + return; + + /* Shortcut when all digits are to be shifted off */ + if (p >= USED(mp)) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = ZPOS; + return; + } + + /* Shift all the significant figures over as needed */ + dst = MP_DIGITS(mp); + src = dst + p; + for (ix = USED(mp) - p; ix > 0; ix--) + *dst++ = *src++; + + MP_USED(mp) -= p; + /* Fill the top digits with zeroes */ + while (p-- > 0) + *dst++ = 0; + +} /* end s_mp_rshd() */ + +/* }}} */ + +/* {{{ s_mp_div_2(mp) */ + +/* Divide by two -- take advantage of radix properties to do it fast */ +void +s_mp_div_2(mp_int *mp) +{ + s_mp_div_2d(mp, 1); + +} /* end s_mp_div_2() */ + +/* }}} */ + +/* {{{ s_mp_mul_2(mp) */ + +mp_err +s_mp_mul_2(mp_int *mp) +{ + mp_digit *pd; + unsigned int ix, used; + mp_digit kin = 0; + + ARGCHK(mp != NULL, MP_BADARG); + + /* Shift digits leftward by 1 bit */ + used = MP_USED(mp); + pd = MP_DIGITS(mp); + for (ix = 0; ix < used; ix++) { + mp_digit d = *pd; + *pd++ = (d << 1) | kin; + kin = (d >> (DIGIT_BIT - 1)); + } + + /* Deal with rollover from last digit */ + if (kin) { + if (ix >= ALLOC(mp)) { + mp_err res; + if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY) + return res; + } + + DIGIT(mp, ix) = kin; + USED(mp) += 1; + } + + return MP_OKAY; + +} /* end s_mp_mul_2() */ + +/* }}} */ + +/* {{{ s_mp_mod_2d(mp, d) */ + +/* + Remainder the integer by 2^d, where d is a number of bits. This + amounts to a bitwise AND of the value, and does not require the full + division code + */ +void +s_mp_mod_2d(mp_int *mp, mp_digit d) +{ + mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT); + mp_size ix; + mp_digit dmask; + + if (ndig >= USED(mp)) + return; + + /* Flush all the bits above 2^d in its digit */ + dmask = ((mp_digit)1 << nbit) - 1; + DIGIT(mp, ndig) &= dmask; + + /* Flush all digits above the one with 2^d in it */ + for (ix = ndig + 1; ix < USED(mp); ix++) + DIGIT(mp, ix) = 0; + + s_mp_clamp(mp); + +} /* end s_mp_mod_2d() */ + +/* }}} */ + +/* {{{ s_mp_div_2d(mp, d) */ + +/* + Divide the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value, and does not require the + full division code (used in Barrett reduction, see below) + */ +void +s_mp_div_2d(mp_int *mp, mp_digit d) +{ + int ix; + mp_digit save, next, mask, lshift; + + s_mp_rshd(mp, d / DIGIT_BIT); + d %= DIGIT_BIT; + /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */ + /* mod and corresponding mask logic avoid that when d = 0 */ + lshift = DIGIT_BIT - d; + lshift %= DIGIT_BIT; + mask = ((mp_digit)1 << d) - 1; + save = 0; + for (ix = USED(mp) - 1; ix >= 0; ix--) { + next = DIGIT(mp, ix) & mask; + DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d); + save = next; + } + s_mp_clamp(mp); + +} /* end s_mp_div_2d() */ + +/* }}} */ + +/* {{{ s_mp_norm(a, b, *d) */ + +/* + s_mp_norm(a, b, *d) + + Normalize a and b for division, where b is the divisor. In order + that we might make good guesses for quotient digits, we want the + leading digit of b to be at least half the radix, which we + accomplish by multiplying a and b by a power of 2. The exponent + (shift count) is placed in *pd, so that the remainder can be shifted + back at the end of the division process. + */ + +mp_err +s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd) +{ + mp_digit d; + mp_digit mask; + mp_digit b_msd; + mp_err res = MP_OKAY; + + ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG); + + d = 0; + mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */ + b_msd = DIGIT(b, USED(b) - 1); + while (!(b_msd & mask)) { + b_msd <<= 1; + ++d; + } + + if (d) { + MP_CHECKOK(s_mp_mul_2d(a, d)); + MP_CHECKOK(s_mp_mul_2d(b, d)); + } + + *pd = d; +CLEANUP: + return res; + +} /* end s_mp_norm() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive digit arithmetic */ + +/* {{{ s_mp_add_d(mp, d) */ + +/* Add d to |mp| in place */ +mp_err +s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w, k = 0; + mp_size ix = 1; + + w = (mp_word)DIGIT(mp, 0) + d; + DIGIT(mp, 0) = ACCUM(w); + k = CARRYOUT(w); + + while (ix < USED(mp) && k) { + w = (mp_word)DIGIT(mp, ix) + k; + DIGIT(mp, ix) = ACCUM(w); + k = CARRYOUT(w); + ++ix; + } + + if (k != 0) { + mp_err res; + + if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY) + return res; + + DIGIT(mp, ix) = (mp_digit)k; + } + + return MP_OKAY; +#else + mp_digit *pmp = MP_DIGITS(mp); + mp_digit sum, mp_i, carry = 0; + mp_err res = MP_OKAY; + int used = (int)MP_USED(mp); + + mp_i = *pmp; + *pmp++ = sum = d + mp_i; + carry = (sum < d); + while (carry && --used > 0) { + mp_i = *pmp; + *pmp++ = sum = carry + mp_i; + carry = !sum; + } + if (carry && !used) { + /* mp is growing */ + used = MP_USED(mp); + MP_CHECKOK(s_mp_pad(mp, used + 1)); + MP_DIGIT(mp, used) = carry; + } +CLEANUP: + return res; +#endif +} /* end s_mp_add_d() */ + +/* }}} */ + +/* {{{ s_mp_sub_d(mp, d) */ + +/* Subtract d from |mp| in place, assumes |mp| > d */ +mp_err +s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_word w, b = 0; + mp_size ix = 1; + + /* Compute initial subtraction */ + w = (RADIX + (mp_word)DIGIT(mp, 0)) - d; + b = CARRYOUT(w) ? 0 : 1; + DIGIT(mp, 0) = ACCUM(w); + + /* Propagate borrows leftward */ + while (b && ix < USED(mp)) { + w = (RADIX + (mp_word)DIGIT(mp, ix)) - b; + b = CARRYOUT(w) ? 0 : 1; + DIGIT(mp, ix) = ACCUM(w); + ++ix; + } + + /* Remove leading zeroes */ + s_mp_clamp(mp); + + /* If we have a borrow out, it's a violation of the input invariant */ + if (b) + return MP_RANGE; + else + return MP_OKAY; +#else + mp_digit *pmp = MP_DIGITS(mp); + mp_digit mp_i, diff, borrow; + mp_size used = MP_USED(mp); + + mp_i = *pmp; + *pmp++ = diff = mp_i - d; + borrow = (diff > mp_i); + while (borrow && --used) { + mp_i = *pmp; + *pmp++ = diff = mp_i - borrow; + borrow = (diff > mp_i); + } + s_mp_clamp(mp); + return (borrow && !used) ? MP_RANGE : MP_OKAY; +#endif +} /* end s_mp_sub_d() */ + +/* }}} */ + +/* {{{ s_mp_mul_d(a, d) */ + +/* Compute a = a * d, single digit multiplication */ +mp_err +s_mp_mul_d(mp_int *a, mp_digit d) +{ + mp_err res; + mp_size used; + int pow; + + if (!d) { + mp_zero(a); + return MP_OKAY; + } + if (d == 1) + return MP_OKAY; + if (0 <= (pow = s_mp_ispow2d(d))) { + return s_mp_mul_2d(a, (mp_digit)pow); + } + + used = MP_USED(a); + MP_CHECKOK(s_mp_pad(a, used + 1)); + + s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a)); + + s_mp_clamp(a); + +CLEANUP: + return res; + +} /* end s_mp_mul_d() */ + +/* }}} */ + +/* {{{ s_mp_div_d(mp, d, r) */ + +/* + s_mp_div_d(mp, d, r) + + Compute the quotient mp = mp / d and remainder r = mp mod d, for a + single digit d. If r is null, the remainder will be discarded. + */ + +mp_err +s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + mp_word w = 0, q; +#else + mp_digit w = 0, q; +#endif + int ix; + mp_err res; + mp_int quot; + mp_int rem; + + if (d == 0) + return MP_RANGE; + if (d == 1) { + if (r) + *r = 0; + return MP_OKAY; + } + /* could check for power of 2 here, but mp_div_d does that. */ + if (MP_USED(mp) == 1) { + mp_digit n = MP_DIGIT(mp, 0); + mp_digit remdig; + + q = n / d; + remdig = n % d; + MP_DIGIT(mp, 0) = q; + if (r) { + *r = remdig; + } + return MP_OKAY; + } + + MP_DIGITS(&rem) = 0; + MP_DIGITS(") = 0; + /* Make room for the quotient */ + MP_CHECKOK(mp_init_size(", USED(mp))); + +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + for (ix = USED(mp) - 1; ix >= 0; ix--) { + w = (w << DIGIT_BIT) | DIGIT(mp, ix); + + if (w >= d) { + q = w / d; + w = w % d; + } else { + q = 0; + } + + s_mp_lshd(", 1); + DIGIT(", 0) = (mp_digit)q; + } +#else + { + mp_digit p; +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + mp_digit norm; +#endif + + MP_CHECKOK(mp_init_copy(&rem, mp)); + +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + MP_DIGIT(", 0) = d; + MP_CHECKOK(s_mp_norm(&rem, ", &norm)); + if (norm) + d <<= norm; + MP_DIGIT(", 0) = 0; +#endif + + p = 0; + for (ix = USED(&rem) - 1; ix >= 0; ix--) { + w = DIGIT(&rem, ix); + + if (p) { + MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w)); + } else if (w >= d) { + q = w / d; + w = w % d; + } else { + q = 0; + } + + MP_CHECKOK(s_mp_lshd(", 1)); + DIGIT(", 0) = q; + p = w; + } +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + if (norm) + w >>= norm; +#endif + } +#endif + + /* Deliver the remainder, if desired */ + if (r) { + *r = (mp_digit)w; + } + + s_mp_clamp("); + mp_exch(", mp); +CLEANUP: + mp_clear("); + mp_clear(&rem); + + return res; +} /* end s_mp_div_d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive full arithmetic */ + +/* {{{ s_mp_add(a, b) */ + +/* Compute a = |a| + |b| */ +mp_err +s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w = 0; +#else + mp_digit d, sum, carry = 0; +#endif + mp_digit *pa, *pb; + mp_size ix; + mp_size used; + mp_err res; + + /* Make sure a has enough precision for the output value */ + if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + used = MP_USED(b); + for (ix = 0; ix < used; ix++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa + *pb++; + *pa++ = ACCUM(w); + w = CARRYOUT(w); +#else + d = *pa; + sum = d + *pb++; + d = (sum < d); /* detect overflow */ + *pa++ = sum += carry; + carry = d + (sum < carry); /* detect overflow */ +#endif + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + used = MP_USED(a); +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + while (w && ix < used) { + w = w + *pa; + *pa++ = ACCUM(w); + w = CARRYOUT(w); + ++ix; + } +#else + while (carry && ix < used) { + sum = carry + *pa; + *pa++ = sum; + carry = !sum; + ++ix; + } +#endif + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (w) { + if ((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, ix) = (mp_digit)w; + } +#else + if (carry) { + if ((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, used) = carry; + } +#endif + + return MP_OKAY; +} /* end s_mp_add() */ + +/* }}} */ + +/* Compute c = |a| + |b| */ /* magnitude addition */ +mp_err +s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w = 0; +#else + mp_digit sum, carry = 0, d; +#endif + mp_size ix; + mp_size used; + mp_err res; + + MP_SIGN(c) = MP_SIGN(a); + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = a; + a = b; + b = xch; + } + + /* Make sure a has enough precision for the output value */ + if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a)))) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + exchange step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + pc = MP_DIGITS(c); + used = MP_USED(b); + for (ix = 0; ix < used; ix++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa++ + *pb++; + *pc++ = ACCUM(w); + w = CARRYOUT(w); +#else + d = *pa++; + sum = d + *pb++; + d = (sum < d); /* detect overflow */ + *pc++ = sum += carry; + carry = d + (sum < carry); /* detect overflow */ +#endif + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + for (used = MP_USED(a); ix < used; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa++; + *pc++ = ACCUM(w); + w = CARRYOUT(w); +#else + *pc++ = sum = carry + *pa++; + carry = (sum < carry); +#endif + } + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (w) { + if ((res = s_mp_pad(c, used + 1)) != MP_OKAY) + return res; + + DIGIT(c, used) = (mp_digit)w; + ++used; + } +#else + if (carry) { + if ((res = s_mp_pad(c, used + 1)) != MP_OKAY) + return res; + + DIGIT(c, used) = carry; + ++used; + } +#endif + MP_USED(c) = used; + return MP_OKAY; +} +/* {{{ s_mp_add_offset(a, b, offset) */ + +/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */ +mp_err +s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w, k = 0; +#else + mp_digit d, sum, carry = 0; +#endif + mp_size ib; + mp_size ia; + mp_size lim; + mp_err res; + + /* Make sure a has enough precision for the output value */ + lim = MP_USED(b) + offset; + if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + lim = USED(b); + for (ib = 0, ia = offset; ib < lim; ib++, ia++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k; + DIGIT(a, ia) = ACCUM(w); + k = CARRYOUT(w); +#else + d = MP_DIGIT(a, ia); + sum = d + MP_DIGIT(b, ib); + d = (sum < d); + MP_DIGIT(a, ia) = sum += carry; + carry = d + (sum < carry); +#endif + } + +/* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + for (lim = MP_USED(a); k && (ia < lim); ++ia) { + w = (mp_word)DIGIT(a, ia) + k; + DIGIT(a, ia) = ACCUM(w); + k = CARRYOUT(w); + } +#else + for (lim = MP_USED(a); carry && (ia < lim); ++ia) { + d = MP_DIGIT(a, ia); + MP_DIGIT(a, ia) = sum = d + carry; + carry = (sum < d); + } +#endif + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (k) { + if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY) + return res; + + DIGIT(a, ia) = (mp_digit)k; + } +#else + if (carry) { + if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY) + return res; + + DIGIT(a, lim) = carry; + } +#endif + s_mp_clamp(a); + + return MP_OKAY; + +} /* end s_mp_add_offset() */ + +/* }}} */ + +/* {{{ s_mp_sub(a, b) */ + +/* Compute a = |a| - |b|, assumes |a| >= |b| */ +mp_err +s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */ +{ + mp_digit *pa, *pb, *limit; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_sword w = 0; +#else + mp_digit d, diff, borrow = 0; +#endif + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + limit = pb + MP_USED(b); + while (pb < limit) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa - *pb++; + *pa++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa; + diff = d - *pb++; + d = (diff > d); /* detect borrow */ + if (borrow && --diff == MP_DIGIT_MAX) + ++d; + *pa++ = diff; + borrow = d; +#endif + } + limit = MP_DIGITS(a) + MP_USED(a); +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + while (w && pa < limit) { + w = w + *pa; + *pa++ = ACCUM(w); + w >>= MP_DIGIT_BIT; + } +#else + while (borrow && pa < limit) { + d = *pa; + *pa++ = diff = d - borrow; + borrow = (diff > d); + } +#endif + + /* Clobber any leading zeroes we created */ + s_mp_clamp(a); + +/* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + return w ? MP_RANGE : MP_OKAY; +#else + return borrow ? MP_RANGE : MP_OKAY; +#endif +} /* end s_mp_sub() */ + +/* }}} */ + +/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */ +mp_err +s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_sword w = 0; +#else + mp_digit d, diff, borrow = 0; +#endif + int ix, limit; + mp_err res; + + MP_SIGN(c) = MP_SIGN(a); + + /* Make sure a has enough precision for the output value */ + if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a)))) + return res; + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + pc = MP_DIGITS(c); + limit = MP_USED(b); + for (ix = 0; ix < limit; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa++ - *pb++; + *pc++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa++; + diff = d - *pb++; + d = (diff > d); + if (borrow && --diff == MP_DIGIT_MAX) + ++d; + *pc++ = diff; + borrow = d; +#endif + } + for (limit = MP_USED(a); ix < limit; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa++; + *pc++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa++; + *pc++ = diff = d - borrow; + borrow = (diff > d); +#endif + } + + /* Clobber any leading zeroes we created */ + MP_USED(c) = ix; + s_mp_clamp(c); + +/* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + return w ? MP_RANGE : MP_OKAY; +#else + return borrow ? MP_RANGE : MP_OKAY; +#endif +} +/* {{{ s_mp_mul(a, b) */ + +/* Compute a = |a| * |b| */ +mp_err +s_mp_mul(mp_int *a, const mp_int *b) +{ + return mp_mul(a, b, a); +} /* end s_mp_mul() */ + +/* }}} */ + +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY) +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + unsigned long long product = (unsigned long long)a * b; \ + Plo = (mp_digit)product; \ + Phi = (mp_digit)(product >> MP_DIGIT_BIT); \ + } +#else +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + mp_digit a0b1, a1b0; \ + Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \ + Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \ + a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \ + a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \ + a1b0 += a0b1; \ + Phi += a1b0 >> MP_HALF_DIGIT_BIT; \ + if (a1b0 < a0b1) \ + Phi += MP_HALF_RADIX; \ + a1b0 <<= MP_HALF_DIGIT_BIT; \ + Plo += a1b0; \ + if (Plo < a1b0) \ + ++Phi; \ + } +#endif + +#if !defined(MP_ASSEMBLY_MULTIPLY) +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* c += a * b */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + while (d) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + + *c++ = a0b0; + carry = a1b1; + } + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +#endif +} +#endif + +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY) +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_SQR_D(a, Phi, Plo) \ + { \ + unsigned long long square = (unsigned long long)a * a; \ + Plo = (mp_digit)square; \ + Phi = (mp_digit)(square >> MP_DIGIT_BIT); \ + } +#else +#define MP_SQR_D(a, Phi, Plo) \ + { \ + mp_digit Pmid; \ + Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \ + Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \ + Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \ + Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \ + Pmid <<= (MP_HALF_DIGIT_BIT + 1); \ + Plo += Pmid; \ + if (Plo < Pmid) \ + ++Phi; \ + } +#endif + +#if !defined(MP_ASSEMBLY_SQUARE) +/* Add the squares of the digits of a to the digits of b. */ +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_word w; + mp_digit d; + mp_size ix; + + w = 0; +#define ADD_SQUARE(n) \ + d = pa[n]; \ + w += (d * (mp_word)d) + ps[2 * n]; \ + ps[2 * n] = ACCUM(w); \ + w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \ + ps[2 * n + 1] = ACCUM(w); \ + w = (w >> DIGIT_BIT) + + for (ix = a_len; ix >= 4; ix -= 4) { + ADD_SQUARE(0); + ADD_SQUARE(1); + ADD_SQUARE(2); + ADD_SQUARE(3); + pa += 4; + ps += 8; + } + if (ix) { + ps += 2 * ix; + pa += ix; + switch (ix) { + case 3: + ADD_SQUARE(-3); /* FALLTHRU */ + case 2: + ADD_SQUARE(-2); /* FALLTHRU */ + case 1: + ADD_SQUARE(-1); /* FALLTHRU */ + case 0: + break; + } + } + while (w) { + w += *ps; + *ps++ = ACCUM(w); + w = (w >> DIGIT_BIT); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *pa++; + mp_digit a0a0, a1a1; + + MP_SQR_D(a_i, a1a1, a0a0); + + /* here a1a1 and a0a0 constitute a_i ** 2 */ + a0a0 += carry; + if (a0a0 < carry) + ++a1a1; + + /* now add to ps */ + a0a0 += a_i = *ps; + if (a0a0 < a_i) + ++a1a1; + *ps++ = a0a0; + a1a1 += a_i = *ps; + carry = (a1a1 < a_i); + *ps++ = a1a1; + } + while (carry) { + mp_digit s_i = *ps; + carry += s_i; + *ps++ = carry; + carry = carry < s_i; + } +#endif +} +#endif + +#if !defined(MP_ASSEMBLY_DIV_2DX1D) +/* +** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized +** so its high bit is 1. This code is from NSPR. +*/ +mp_err +s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + mp_digit *qp, mp_digit *rp) +{ + mp_digit d1, d0, q1, q0; + mp_digit r1, r0, m; + + d1 = divisor >> MP_HALF_DIGIT_BIT; + d0 = divisor & MP_HALF_DIGIT_MAX; + r1 = Nhi % d1; + q1 = Nhi / d1; + m = q1 * d0; + r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT); + if (r1 < m) { + q1--, r1 += divisor; + if (r1 >= divisor && r1 < m) { + q1--, r1 += divisor; + } + } + r1 -= m; + r0 = r1 % d1; + q0 = r1 / d1; + m = q0 * d0; + r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX); + if (r0 < m) { + q0--, r0 += divisor; + if (r0 >= divisor && r0 < m) { + q0--, r0 += divisor; + } + } + if (qp) + *qp = (q1 << MP_HALF_DIGIT_BIT) | q0; + if (rp) + *rp = r0 - m; + return MP_OKAY; +} +#endif + +#if MP_SQUARE +/* {{{ s_mp_sqr(a) */ + +mp_err +s_mp_sqr(mp_int *a) +{ + mp_err res; + mp_int tmp; + + if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY) + return res; + res = mp_sqr(a, &tmp); + if (res == MP_OKAY) { + s_mp_exch(&tmp, a); + } + mp_clear(&tmp); + return res; +} + +/* }}} */ +#endif + +/* {{{ s_mp_div(a, b) */ + +/* + s_mp_div(a, b) + + Compute a = a / b and b = a mod b. Assumes b > a. + */ + +mp_err +s_mp_div(mp_int *rem, /* i: dividend, o: remainder */ + mp_int *div, /* i: divisor */ + mp_int *quot) /* i: 0; o: quotient */ +{ + mp_int part, t; + mp_digit q_msd; + mp_err res; + mp_digit d; + mp_digit div_msd; + int ix; + + if (mp_cmp_z(div) == 0) + return MP_RANGE; + + DIGITS(&t) = 0; + /* Shortcut if divisor is power of two */ + if ((ix = s_mp_ispow2(div)) >= 0) { + MP_CHECKOK(mp_copy(rem, quot)); + s_mp_div_2d(quot, (mp_digit)ix); + s_mp_mod_2d(rem, (mp_digit)ix); + + return MP_OKAY; + } + + MP_SIGN(rem) = ZPOS; + MP_SIGN(div) = ZPOS; + MP_SIGN(&part) = ZPOS; + + /* A working temporary for division */ + MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem))); + + /* Normalize to optimize guessing */ + MP_CHECKOK(s_mp_norm(rem, div, &d)); + + /* Perform the division itself...woo! */ + MP_USED(quot) = MP_ALLOC(quot); + + /* Find a partial substring of rem which is at least div */ + /* If we didn't find one, we're finished dividing */ + while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) { + int i; + int unusedRem; + int partExtended = 0; /* set to true if we need to extend part */ + + unusedRem = MP_USED(rem) - MP_USED(div); + MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem; + MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem; + MP_USED(&part) = MP_USED(div); + + /* We have now truncated the part of the remainder to the same length as + * the divisor. If part is smaller than div, extend part by one digit. */ + if (s_mp_cmp(&part, div) < 0) { + --unusedRem; +#if MP_ARGCHK == 2 + assert(unusedRem >= 0); +#endif + --MP_DIGITS(&part); + ++MP_USED(&part); + ++MP_ALLOC(&part); + partExtended = 1; + } + + /* Compute a guess for the next quotient digit */ + q_msd = MP_DIGIT(&part, MP_USED(&part) - 1); + div_msd = MP_DIGIT(div, MP_USED(div) - 1); + if (!partExtended) { + /* In this case, q_msd /= div_msd is always 1. First, since div_msd is + * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since + * we didn't extend part, q_msd >= div_msd. Therefore we know that + * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we + * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */ + q_msd = 1; + } else { + if (q_msd == div_msd) { + q_msd = MP_DIGIT_MAX; + } else { + mp_digit r; + MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2), + div_msd, &q_msd, &r)); + } + } +#if MP_ARGCHK == 2 + assert(q_msd > 0); /* This case should never occur any more. */ +#endif + if (q_msd <= 0) + break; + + /* See what that multiplies out to */ + mp_copy(div, &t); + MP_CHECKOK(s_mp_mul_d(&t, q_msd)); + + /* + If it's too big, back it off. We should not have to do this + more than once, or, in rare cases, twice. Knuth describes a + method by which this could be reduced to a maximum of once, but + I didn't implement that here. + When using s_mpv_div_2dx1d, we may have to do this 3 times. + */ + for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) { + --q_msd; + MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */ + } + if (i < 0) { + res = MP_RANGE; + goto CLEANUP; + } + + /* At this point, q_msd should be the right next digit */ + MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */ + s_mp_clamp(rem); + + /* + Include the digit in the quotient. We allocated enough memory + for any quotient we could ever possibly get, so we should not + have to check for failures here + */ + MP_DIGIT(quot, unusedRem) = q_msd; + } + + /* Denormalize remainder */ + if (d) { + s_mp_div_2d(rem, d); + } + + s_mp_clamp(quot); + +CLEANUP: + mp_clear(&t); + + return res; + +} /* end s_mp_div() */ + +/* }}} */ + +/* {{{ s_mp_2expt(a, k) */ + +mp_err +s_mp_2expt(mp_int *a, mp_digit k) +{ + mp_err res; + mp_size dig, bit; + + dig = k / DIGIT_BIT; + bit = k % DIGIT_BIT; + + mp_zero(a); + if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY) + return res; + + DIGIT(a, dig) |= ((mp_digit)1 << bit); + + return MP_OKAY; + +} /* end s_mp_2expt() */ + +/* }}} */ + +/* {{{ s_mp_reduce(x, m, mu) */ + +/* + Compute Barrett reduction, x (mod m), given a precomputed value for + mu = b^2k / m, where b = RADIX and k = #digits(m). This should be + faster than straight division, when many reductions by the same + value of m are required (such as in modular exponentiation). This + can nearly halve the time required to do modular exponentiation, + as compared to using the full integer divide to reduce. + + This algorithm was derived from the _Handbook of Applied + Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14, + pp. 603-604. + */ + +mp_err +s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu) +{ + mp_int q; + mp_err res; + + if ((res = mp_init_copy(&q, x)) != MP_OKAY) + return res; + + s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1) */ + s_mp_mul(&q, mu); /* q2 = q1 * mu */ + s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */ + + /* x = x mod b^(k+1), quick (no division) */ + s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1)); + + /* q = q * m mod b^(k+1), quick (no division) */ + s_mp_mul(&q, m); + s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1)); + + /* x = x - q */ + if ((res = mp_sub(x, &q, x)) != MP_OKAY) + goto CLEANUP; + + /* If x < 0, add b^(k+1) to it */ + if (mp_cmp_z(x) < 0) { + mp_set(&q, 1); + if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_add(x, &q, x)) != MP_OKAY) + goto CLEANUP; + } + + /* Back off if it's too big */ + while (mp_cmp(x, m) >= 0) { + if ((res = s_mp_sub(x, m)) != MP_OKAY) + break; + } + +CLEANUP: + mp_clear(&q); + + return res; + +} /* end s_mp_reduce() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive comparisons */ + +/* {{{ s_mp_cmp(a, b) */ + +/* Compare |a| <=> |b|, return 0 if equal, <0 if a0 if a>b */ +int +s_mp_cmp(const mp_int *a, const mp_int *b) +{ + ARGMPCHK(a != NULL && b != NULL); + + mp_size used_a = MP_USED(a); + { + mp_size used_b = MP_USED(b); + + if (used_a > used_b) + goto IS_GT; + if (used_a < used_b) + goto IS_LT; + } + { + mp_digit *pa, *pb; + mp_digit da = 0, db = 0; + +#define CMP_AB(n) \ + if ((da = pa[n]) != (db = pb[n])) \ + goto done + + pa = MP_DIGITS(a) + used_a; + pb = MP_DIGITS(b) + used_a; + while (used_a >= 4) { + pa -= 4; + pb -= 4; + used_a -= 4; + CMP_AB(3); + CMP_AB(2); + CMP_AB(1); + CMP_AB(0); + } + while (used_a-- > 0 && ((da = *--pa) == (db = *--pb))) + /* do nothing */; + done: + if (da > db) + goto IS_GT; + if (da < db) + goto IS_LT; + } + return MP_EQ; +IS_LT: + return MP_LT; +IS_GT: + return MP_GT; +} /* end s_mp_cmp() */ + +/* }}} */ + +/* {{{ s_mp_cmp_d(a, d) */ + +/* Compare |a| <=> d, return 0 if equal, <0 if a0 if a>d */ +int +s_mp_cmp_d(const mp_int *a, mp_digit d) +{ + ARGMPCHK(a != NULL); + + if (USED(a) > 1) + return MP_GT; + + if (DIGIT(a, 0) < d) + return MP_LT; + else if (DIGIT(a, 0) > d) + return MP_GT; + else + return MP_EQ; + +} /* end s_mp_cmp_d() */ + +/* }}} */ + +/* {{{ s_mp_ispow2(v) */ + +/* + Returns -1 if the value is not a power of two; otherwise, it returns + k such that v = 2^k, i.e. lg(v). + */ +int +s_mp_ispow2(const mp_int *v) +{ + mp_digit d; + int extra = 0, ix; + + ARGMPCHK(v != NULL); + + ix = MP_USED(v) - 1; + d = MP_DIGIT(v, ix); /* most significant digit of v */ + + extra = s_mp_ispow2d(d); + if (extra < 0 || ix == 0) + return extra; + + while (--ix >= 0) { + if (DIGIT(v, ix) != 0) + return -1; /* not a power of two */ + extra += MP_DIGIT_BIT; + } + + return extra; + +} /* end s_mp_ispow2() */ + +/* }}} */ + +/* {{{ s_mp_ispow2d(d) */ + +int +s_mp_ispow2d(mp_digit d) +{ + if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */ + int pow = 0; +#if defined(MP_USE_UINT_DIGIT) + if (d & 0xffff0000U) + pow += 16; + if (d & 0xff00ff00U) + pow += 8; + if (d & 0xf0f0f0f0U) + pow += 4; + if (d & 0xccccccccU) + pow += 2; + if (d & 0xaaaaaaaaU) + pow += 1; +#elif defined(MP_USE_LONG_LONG_DIGIT) + if (d & 0xffffffff00000000ULL) + pow += 32; + if (d & 0xffff0000ffff0000ULL) + pow += 16; + if (d & 0xff00ff00ff00ff00ULL) + pow += 8; + if (d & 0xf0f0f0f0f0f0f0f0ULL) + pow += 4; + if (d & 0xccccccccccccccccULL) + pow += 2; + if (d & 0xaaaaaaaaaaaaaaaaULL) + pow += 1; +#elif defined(MP_USE_LONG_DIGIT) + if (d & 0xffffffff00000000UL) + pow += 32; + if (d & 0xffff0000ffff0000UL) + pow += 16; + if (d & 0xff00ff00ff00ff00UL) + pow += 8; + if (d & 0xf0f0f0f0f0f0f0f0UL) + pow += 4; + if (d & 0xccccccccccccccccUL) + pow += 2; + if (d & 0xaaaaaaaaaaaaaaaaUL) + pow += 1; +#else +#error "unknown type for mp_digit" +#endif + return pow; + } + return -1; + +} /* end s_mp_ispow2d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive I/O helpers */ + +/* {{{ s_mp_tovalue(ch, r) */ + +/* + Convert the given character to its digit value, in the given radix. + If the given character is not understood in the given radix, -1 is + returned. Otherwise the digit's numeric value is returned. + + The results will be odd if you use a radix < 2 or > 62, you are + expected to know what you're up to. + */ +int +s_mp_tovalue(char ch, int r) +{ + int val, xch; + + if (r > 36) + xch = ch; + else + xch = toupper(ch); + + if (isdigit(xch)) + val = xch - '0'; + else if (isupper(xch)) + val = xch - 'A' + 10; + else if (islower(xch)) + val = xch - 'a' + 36; + else if (xch == '+') + val = 62; + else if (xch == '/') + val = 63; + else + return -1; + + if (val < 0 || val >= r) + return -1; + + return val; + +} /* end s_mp_tovalue() */ + +/* }}} */ + +/* {{{ s_mp_todigit(val, r, low) */ + +/* + Convert val to a radix-r digit, if possible. If val is out of range + for r, returns zero. Otherwise, returns an ASCII character denoting + the value in the given radix. + + The results may be odd if you use a radix < 2 or > 64, you are + expected to know what you're doing. + */ + +char +s_mp_todigit(mp_digit val, int r, int low) +{ + char ch; + + if (val >= r) + return 0; + + ch = s_dmap_1[val]; + + if (r <= 36 && low) + ch = tolower(ch); + + return ch; + +} /* end s_mp_todigit() */ + +/* }}} */ + +/* {{{ s_mp_outlen(bits, radix) */ + +/* + Return an estimate for how long a string is needed to hold a radix + r representation of a number with 'bits' significant bits, plus an + extra for a zero terminator (assuming C style strings here) + */ +int +s_mp_outlen(int bits, int r) +{ + return (int)((double)bits * LOG_V_2(r) + 1.5) + 1; + +} /* end s_mp_outlen() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_read_unsigned_octets(mp, str, len) */ +/* mp_read_unsigned_octets(mp, str, len) + Read in a raw value (base 256) into the given mp_int + No sign bit, number is positive. Leading zeros ignored. + */ + +mp_err +mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len) +{ + int count; + mp_err res; + mp_digit d; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + count = len % sizeof(mp_digit); + if (count) { + for (d = 0; count-- > 0; --len) { + d = (d << 8) | *str++; + } + MP_DIGIT(mp, 0) = d; + } + + /* Read the rest of the digits */ + for (; len > 0; len -= sizeof(mp_digit)) { + for (d = 0, count = sizeof(mp_digit); count > 0; --count) { + d = (d << 8) | *str++; + } + if (MP_EQ == mp_cmp_z(mp)) { + if (!d) + continue; + } else { + if ((res = s_mp_lshd(mp, 1)) != MP_OKAY) + return res; + } + MP_DIGIT(mp, 0) = d; + } + return MP_OKAY; +} /* end mp_read_unsigned_octets() */ +/* }}} */ + +/* {{{ mp_unsigned_octet_size(mp) */ +unsigned int +mp_unsigned_octet_size(const mp_int *mp) +{ + unsigned int bytes; + int ix; + mp_digit d = 0; + + ARGCHK(mp != NULL, MP_BADARG); + ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG); + + bytes = (USED(mp) * sizeof(mp_digit)); + + /* subtract leading zeros. */ + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + d = DIGIT(mp, ix); + if (d) + break; + bytes -= sizeof(d); + } + if (!bytes) + return 1; + + /* Have MSD, check digit bytes, high order first */ + for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) { + unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT)); + if (x) + break; + --bytes; + } + return bytes; +} /* end mp_unsigned_octet_size() */ +/* }}} */ + +/* {{{ mp_to_unsigned_octets(mp, str) */ +/* output a buffer of big endian octets no longer than specified. */ +mp_err +mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= maxlen, MP_BADARG); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos && !x) /* suppress leading zeros */ + continue; + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return pos; +} /* end mp_to_unsigned_octets() */ +/* }}} */ + +/* {{{ mp_to_signed_octets(mp, str) */ +/* output a buffer of big endian octets no longer than specified. */ +mp_err +mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= maxlen, MP_BADARG); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos) { + if (!x) /* suppress leading zeros */ + continue; + if (x & 0x80) { /* add one leading zero to make output positive. */ + ARGCHK(bytes + 1 <= maxlen, MP_BADARG); + if (bytes + 1 > maxlen) + return MP_BADARG; + str[pos++] = 0; + } + } + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return pos; +} /* end mp_to_signed_octets() */ +/* }}} */ + +/* {{{ mp_to_fixlen_octets(mp, str) */ +/* output a buffer of big endian octets exactly as long as requested. + constant time on the value of mp. */ +mp_err +mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length) +{ + int ix, jx; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG); + + /* Constant time on the value of mp. Don't use mp_unsigned_octet_size. */ + bytes = USED(mp) * MP_DIGIT_SIZE; + + /* If the output is shorter than the native size of mp, then check that any + * bytes not written have zero values. This check isn't constant time on + * the assumption that timing-sensitive callers can guarantee that mp fits + * in the allocated space. */ + ix = USED(mp) - 1; + if (bytes > length) { + unsigned int zeros = bytes - length; + + while (zeros >= MP_DIGIT_SIZE) { + ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG); + zeros -= MP_DIGIT_SIZE; + ix--; + } + + if (zeros > 0) { + mp_digit d = DIGIT(mp, ix); + mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT); + ARGCHK((d & m) == 0, MP_BADARG); + for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) { + *str++ = d >> (jx * CHAR_BIT); + } + ix--; + } + } else if (bytes < length) { + /* Place any needed leading zeros. */ + unsigned int zeros = length - bytes; + memset(str, 0, zeros); + str += zeros; + } + + /* Iterate over each whole digit... */ + for (; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + + /* Unpack digit bytes, high order first */ + for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) { + *str++ = d >> (jx * CHAR_BIT); + } + } + return MP_OKAY; +} /* end mp_to_fixlen_octets() */ +/* }}} */ + +/* {{{ mp_cswap(condition, a, b, numdigits) */ +/* performs a conditional swap between mp_int. */ +mp_err +mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits) +{ + mp_digit x; + unsigned int i; + mp_err res = 0; + + /* if pointers are equal return */ + if (a == b) + return res; + + if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) { + MP_CHECKOK(s_mp_grow(a, numdigits)); + MP_CHECKOK(s_mp_grow(b, numdigits)); + } + + condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1; + + x = (USED(a) ^ USED(b)) & condition; + USED(a) ^= x; + USED(b) ^= x; + + x = (SIGN(a) ^ SIGN(b)) & condition; + SIGN(a) ^= x; + SIGN(b) ^= x; + + for (i = 0; i < numdigits; i++) { + x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition; + DIGIT(a, i) ^= x; + DIGIT(b, i) ^= x; + } + +CLEANUP: + return res; +} /* end mp_cswap() */ +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h new file mode 100644 index 0000000000..4ba9b6a4bf --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi.h @@ -0,0 +1,322 @@ +/* + * mpi.h + * + * Arbitrary precision integer arithmetic library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MPI_ +#define _H_MPI_ + +#include "mpi-config.h" + +#include "seccomon.h" +SEC_BEGIN_PROTOS + +#if MP_DEBUG +#undef MP_IOFUNC +#define MP_IOFUNC 1 +#endif + +#if MP_IOFUNC +#include +#include +#endif + +#include + +#if defined(BSDI) +#undef ULLONG_MAX +#endif + +#include + +#define MP_NEG 1 +#define MP_ZPOS 0 + +#define MP_OKAY 0 /* no error, all is well */ +#define MP_YES 0 /* yes (boolean result) */ +#define MP_NO -1 /* no (boolean result) */ +#define MP_MEM -2 /* out of memory */ +#define MP_RANGE -3 /* argument out of range */ +#define MP_BADARG -4 /* invalid parameter */ +#define MP_UNDEF -5 /* answer is undefined */ +#define MP_LAST_CODE MP_UNDEF + +typedef unsigned int mp_sign; +typedef unsigned int mp_size; +typedef int mp_err; + +#define MP_32BIT_MAX 4294967295U + +#if !defined(ULONG_MAX) +#error "ULONG_MAX not defined" +#elif !defined(UINT_MAX) +#error "UINT_MAX not defined" +#elif !defined(USHRT_MAX) +#error "USHRT_MAX not defined" +#endif + +#if defined(ULLONG_MAX) /* C99, Solaris */ +#define MP_ULONG_LONG_MAX ULLONG_MAX +/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */ +#elif defined(ULONG_LONG_MAX) /* HPUX */ +#define MP_ULONG_LONG_MAX ULONG_LONG_MAX +#elif defined(ULONGLONG_MAX) /* AIX */ +#define MP_ULONG_LONG_MAX ULONGLONG_MAX +#endif + +/* We only use unsigned long for mp_digit iff long is more than 32 bits. */ +#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX +typedef unsigned long mp_digit; +#define MP_DIGIT_MAX ULONG_MAX +#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX UINT_MAX +#undef MP_NO_MP_WORD +#define MP_NO_MP_WORD 1 +#undef MP_USE_LONG_DIGIT +#define MP_USE_LONG_DIGIT 1 +#undef MP_USE_LONG_LONG_DIGIT + +#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX) +typedef unsigned long long mp_digit; +#define MP_DIGIT_MAX MP_ULONG_LONG_MAX +#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX UINT_MAX +#undef MP_NO_MP_WORD +#define MP_NO_MP_WORD 1 +#undef MP_USE_LONG_LONG_DIGIT +#define MP_USE_LONG_LONG_DIGIT 1 +#undef MP_USE_LONG_DIGIT + +#else +typedef unsigned int mp_digit; +#define MP_DIGIT_MAX UINT_MAX +#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX USHRT_MAX +#undef MP_USE_UINT_DIGIT +#define MP_USE_UINT_DIGIT 1 +#undef MP_USE_LONG_LONG_DIGIT +#undef MP_USE_LONG_DIGIT +#endif + +#if !defined(MP_NO_MP_WORD) +#if defined(MP_USE_UINT_DIGIT) && \ + (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX)) + +#if (ULONG_MAX > UINT_MAX) +typedef unsigned long mp_word; +typedef long mp_sword; +#define MP_WORD_MAX ULONG_MAX + +#else +typedef unsigned long long mp_word; +typedef long long mp_sword; +#define MP_WORD_MAX MP_ULONG_LONG_MAX +#endif + +#else +#define MP_NO_MP_WORD 1 +#endif +#endif /* !defined(MP_NO_MP_WORD) */ + +#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD) +typedef unsigned int mp_word; +typedef int mp_sword; +#define MP_WORD_MAX UINT_MAX +#endif + +#define MP_DIGIT_SIZE sizeof(mp_digit) +#define MP_DIGIT_BIT (CHAR_BIT * MP_DIGIT_SIZE) +#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word)) +#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX) + +#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2) +#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX) +/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named +** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's +** consistent with the other _HALF_ names. +*/ + +/* Macros for accessing the mp_int internals */ +#define MP_SIGN(MP) ((MP)->sign) +#define MP_USED(MP) ((MP)->used) +#define MP_ALLOC(MP) ((MP)->alloc) +#define MP_DIGITS(MP) ((MP)->dp) +#define MP_DIGIT(MP, N) (MP)->dp[(N)] + +/* This defines the maximum I/O base (minimum is 2) */ +#define MP_MAX_RADIX 64 + +typedef struct { + mp_sign sign; /* sign of this quantity */ + mp_size alloc; /* how many digits allocated */ + mp_size used; /* how many digits used */ + mp_digit *dp; /* the digits themselves */ +} mp_int; + +/* Default precision */ +mp_size mp_get_prec(void); +void mp_set_prec(mp_size prec); + +/* Memory management */ +mp_err mp_init(mp_int *mp); +mp_err mp_init_size(mp_int *mp, mp_size prec); +mp_err mp_init_copy(mp_int *mp, const mp_int *from); +mp_err mp_copy(const mp_int *from, mp_int *to); +void mp_exch(mp_int *mp1, mp_int *mp2); +void mp_clear(mp_int *mp); +void mp_zero(mp_int *mp); +void mp_set(mp_int *mp, mp_digit d); +mp_err mp_set_int(mp_int *mp, long z); +#define mp_set_long(mp, z) mp_set_int(mp, z) +mp_err mp_set_ulong(mp_int *mp, unsigned long z); + +/* Single digit arithmetic */ +mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_mul_2(const mp_int *a, mp_int *c); +mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r); +mp_err mp_div_2(const mp_int *a, mp_int *c); +mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c); + +/* Sign manipulations */ +mp_err mp_abs(const mp_int *a, mp_int *b); +mp_err mp_neg(const mp_int *a, mp_int *b); + +/* Full arithmetic */ +mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c); +#if MP_SQUARE +mp_err mp_sqr(const mp_int *a, mp_int *b); +#else +#define mp_sqr(a, b) mp_mul(a, a, b) +#endif +mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r); +mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r); +mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_2expt(mp_int *a, mp_digit k); + +/* Modular arithmetic */ +#if MP_MODARITH +mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c); +mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c); +mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +#if MP_SQUARE +mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c); +#else +#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c) +#endif +mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c); +#endif /* MP_MODARITH */ + +/* Comparisons */ +int mp_cmp_z(const mp_int *a); +int mp_cmp_d(const mp_int *a, mp_digit d); +int mp_cmp(const mp_int *a, const mp_int *b); +int mp_cmp_mag(const mp_int *a, const mp_int *b); +int mp_isodd(const mp_int *a); +int mp_iseven(const mp_int *a); + +/* Number theoretic */ +mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y); +mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c); +mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c); + +/* Input and output */ +#if MP_IOFUNC +void mp_print(mp_int *mp, FILE *ofp); +#endif /* end MP_IOFUNC */ + +/* Base conversion */ +mp_err mp_read_raw(mp_int *mp, char *str, int len); +int mp_raw_size(mp_int *mp); +mp_err mp_toraw(mp_int *mp, char *str); +mp_err mp_read_radix(mp_int *mp, const char *str, int radix); +mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix); +int mp_radix_size(mp_int *mp, int radix); +mp_err mp_toradix(mp_int *mp, char *str, int radix); +int mp_tovalue(char ch, int r); + +#define mp_tobinary(M, S) mp_toradix((M), (S), 2) +#define mp_tooctal(M, S) mp_toradix((M), (S), 8) +#define mp_todecimal(M, S) mp_toradix((M), (S), 10) +#define mp_tohex(M, S) mp_toradix((M), (S), 16) + +/* Error strings */ +const char *mp_strerror(mp_err ec); + +/* Octet string conversion functions */ +mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len); +unsigned int mp_unsigned_octet_size(const mp_int *mp); +mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); +mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); +mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len); + +/* Miscellaneous */ +mp_size mp_trailing_zeros(const mp_int *mp); +void freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx); +mp_err mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits); + +#define MP_CHECKOK(x) \ + if (MP_OKAY > (res = (x))) \ + goto CLEANUP +#define MP_CHECKERR(x) \ + if (MP_OKAY > (res = (x))) \ + goto CLEANUP + +#define NEG MP_NEG +#define ZPOS MP_ZPOS +#define DIGIT_MAX MP_DIGIT_MAX +#define DIGIT_BIT MP_DIGIT_BIT +#define DIGIT_FMT MP_DIGIT_FMT +#define RADIX MP_RADIX +#define MAX_RADIX MP_MAX_RADIX +#define SIGN(MP) MP_SIGN(MP) +#define USED(MP) MP_USED(MP) +#define ALLOC(MP) MP_ALLOC(MP) +#define DIGITS(MP) MP_DIGITS(MP) +#define DIGIT(MP, N) MP_DIGIT(MP, N) + +/* Functions which return an mp_err value will NULL-check their arguments via + * ARGCHK(condition, return), where the caller is responsible for checking the + * mp_err return code. For functions that return an integer type, the caller + * has no way to tell if the value is an error code or a legitimate value. + * Therefore, ARGMPCHK(condition) will trigger an assertion failure on debug + * builds, but no-op in optimized builds. */ +#if MP_ARGCHK == 1 +#define ARGMPCHK(X) /* */ +#define ARGCHK(X, Y) \ + { \ + if (!(X)) { \ + return (Y); \ + } \ + } +#elif MP_ARGCHK == 2 +#include +#define ARGMPCHK(X) assert(X) +#define ARGCHK(X, Y) assert(X) +#else +#define ARGMPCHK(X) /* */ +#define ARGCHK(X, Y) /* */ +#endif + +#ifdef CT_VERIF +void mp_taint(mp_int *mp); +void mp_untaint(mp_int *mp); +#endif + +SEC_END_PROTOS + +#endif /* end _H_MPI_ */ diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c new file mode 100644 index 0000000000..9e538bb6a1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64.c @@ -0,0 +1,32 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MPI_AMD64 +#error This file only works on AMD64 platforms. +#endif + +#include + +/* + * MPI glue + * + */ + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void MPI_ASM_DECL +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + mp_digit w; + mp_digit d; + + d = s_mpv_mul_add_vec64(c, a, a_len, b); + c += a_len; + while (d) { + w = c[0] + d; + d = (w < c[0] || w < d); + *c++ = w; + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_common.S b/security/nss/lib/freebl/mpi/mpi_amd64_common.S new file mode 100644 index 0000000000..4000f2066a --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_common.S @@ -0,0 +1,409 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +# ------------------------------------------------------------------------ +# +# Implementation of s_mpv_mul_set_vec which exploits +# the 64X64->128 bit unsigned multiply instruction. +# +# ------------------------------------------------------------------------ + +# r = a * digit, r and a are vectors of length len +# returns the carry digit +# r and a are 64 bit aligned. +# +# uint64_t +# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +# + +.text; .align 16; .globl s_mpv_mul_set_vec64; + +#ifdef DARWIN +#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64 +.private_extern s_mpv_mul_set_vec64 +s_mpv_mul_set_vec64: +#else +.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: +#endif + + xorq %rax, %rax # if (len == 0) return (0) + testq %rdx, %rdx + jz .L17 + + movq %rdx, %r8 # Use r8 for len; %rdx is used by mul + xorq %r9, %r9 # cy = 0 + +.L15: + cmpq $8, %r8 # 8 - len + jb .L16 + movq 0(%rsi), %rax # rax = a[0] + movq 8(%rsi), %r11 # prefetch a[1] + mulq %rcx # p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 # prefetch a[2] + mulq %rcx # p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 # prefetch a[3] + mulq %rcx # p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 # prefetch a[4] + mulq %rcx # p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 # prefetch a[5] + mulq %rcx # p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 # prefetch a[6] + mulq %rcx # p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 # prefetch a[7] + mulq %rcx # p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + mulq %rcx # p = a[7] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 56(%rdi) # r[7] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L17 + jmp .L15 + +.L16: + movq 0(%rsi), %rax + mulq %rcx # p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 8(%rsi), %rax + mulq %rcx # p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 16(%rsi), %rax + mulq %rcx # p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 24(%rsi), %rax + mulq %rcx # p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 32(%rsi), %rax + mulq %rcx # p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 40(%rsi), %rax + mulq %rcx # p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 48(%rsi), %rax + mulq %rcx # p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + +.L17: + movq %r9, %rax + ret + +#ifndef DARWIN +.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 +#endif + +# ------------------------------------------------------------------------ +# +# Implementation of s_mpv_mul_add_vec which exploits +# the 64X64->128 bit unsigned multiply instruction. +# +# ------------------------------------------------------------------------ + +# r += a * digit, r and a are vectors of length len +# returns the carry digit +# r and a are 64 bit aligned. +# +# uint64_t +# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +# + +.text; .align 16; .globl s_mpv_mul_add_vec64; + +#ifdef DARWIN +#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64 +.private_extern s_mpv_mul_add_vec64 +s_mpv_mul_add_vec64: +#else +.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: +#endif + + xorq %rax, %rax # if (len == 0) return (0) + testq %rdx, %rdx + jz .L27 + + movq %rdx, %r8 # Use r8 for len; %rdx is used by mul + xorq %r9, %r9 # cy = 0 + +.L25: + cmpq $8, %r8 # 8 - len + jb .L26 + movq 0(%rsi), %rax # rax = a[0] + movq 0(%rdi), %r10 # r10 = r[0] + movq 8(%rsi), %r11 # prefetch a[1] + mulq %rcx # p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[0] + movq 8(%rdi), %r10 # prefetch r[1] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 # prefetch a[2] + mulq %rcx # p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[1] + movq 16(%rdi), %r10 # prefetch r[2] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 # prefetch a[3] + mulq %rcx # p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[2] + movq 24(%rdi), %r10 # prefetch r[3] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 # prefetch a[4] + mulq %rcx # p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[3] + movq 32(%rdi), %r10 # prefetch r[4] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 # prefetch a[5] + mulq %rcx # p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[4] + movq 40(%rdi), %r10 # prefetch r[5] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 # prefetch a[6] + mulq %rcx # p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[5] + movq 48(%rdi), %r10 # prefetch r[6] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 # prefetch a[7] + mulq %rcx # p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[6] + movq 56(%rdi), %r10 # prefetch r[7] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + mulq %rcx # p = a[7] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[7] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 56(%rdi) # r[7] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L27 + jmp .L25 + +.L26: + movq 0(%rsi), %rax + movq 0(%rdi), %r10 + mulq %rcx # p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[0] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 8(%rsi), %rax + movq 8(%rdi), %r10 + mulq %rcx # p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[1] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 16(%rsi), %rax + movq 16(%rdi), %r10 + mulq %rcx # p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[2] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 24(%rsi), %rax + movq 24(%rdi), %r10 + mulq %rcx # p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[3] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 32(%rsi), %rax + movq 32(%rdi), %r10 + mulq %rcx # p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[4] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 40(%rsi), %rax + movq 40(%rdi), %r10 + mulq %rcx # p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[5] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 48(%rsi), %rax + movq 48(%rdi), %r10 + mulq %rcx # p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[6] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + +.L27: + movq %r9, %rax + ret + +#ifndef DARWIN +.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 + +# Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm new file mode 100644 index 0000000000..2120c18f9d --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm @@ -0,0 +1,388 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +; +; This code is converted from mpi_amd64_gas.asm for MASM for x64. +; + +; ------------------------------------------------------------------------ +; +; Implementation of s_mpv_mul_set_vec which exploits +; the 64X64->128 bit unsigned multiply instruction. +; +; ------------------------------------------------------------------------ + +; r = a * digit, r and a are vectors of length len +; returns the carry digit +; r and a are 64 bit aligned. +; +; uint64_t +; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +; + +.CODE + +s_mpv_mul_set_vec64 PROC + + ; compatibilities for paramenter registers + ; + ; About GAS and MASM, the usage of parameter registers are different. + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov edx, r8d + mov rcx, r9 + + xor rax, rax + test rdx, rdx + jz L17 + mov r8, rdx + xor r9, r9 + +L15: + cmp r8, 8 + jb L16 + mov rax, [rsi] + mov r11, [8+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [0+rdi], rax + mov r9, rdx + mov rax,r11 + mov r11, [16+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [24+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [32+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [40+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [48+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [56+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [48+rdi],rax + mov r9,rdx + mov rax,r11 + mul rcx + add rax,r9 + adc rdx,0 + mov [56+rdi],rax + mov r9,rdx + add rsi, 64 + add rdi, 64 + sub r8, 8 + jz L17 + jmp L15 + +L16: + mov rax, [0+rsi] + mul rcx + add rax, r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + dec r8 + jz L17 + mov rax, [8+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [8+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [16+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [16+rdi],rax + mov r9,rdx + dec r8 + jz L17 + mov rax, [24+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [24+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [32+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [32+rdi],rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [40+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [40+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [48+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [48+rdi], rax + mov r9, rdx + dec r8 + jz L17 + +L17: + mov rax, r9 + pop rsi + pop rdi + ret + +s_mpv_mul_set_vec64 ENDP + + +;------------------------------------------------------------------------ +; +; Implementation of s_mpv_mul_add_vec which exploits +; the 64X64->128 bit unsigned multiply instruction. +; +;------------------------------------------------------------------------ + +; r += a * digit, r and a are vectors of length len +; returns the carry digit +; r and a are 64 bit aligned. +; +; uint64_t +; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +; + +s_mpv_mul_add_vec64 PROC + + ; compatibilities for paramenter registers + ; + ; About GAS and MASM, the usage of parameter registers are different. + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov edx, r8d + mov rcx, r9 + + xor rax, rax + test rdx, rdx + jz L27 + mov r8, rdx + xor r9, r9 + +L25: + cmp r8, 8 + jb L26 + mov rax, [0+rsi] + mov r10, [0+rdi] + mov r11, [8+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [8+rdi] + add rax,r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [16+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [16+rdi] + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [24+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [24+rdi] + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [32+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [32+rdi] + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [40+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [40+rdi] + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [48+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [48+rdi] + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [56+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [56+rdi] + add rax,r9 + adc rdx,0 + mov [48+rdi],rax + mov r9,rdx + mov rax,r11 + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [56+rdi],rax + mov r9,rdx + add rsi,64 + add rdi,64 + sub r8, 8 + jz L27 + jmp L25 + +L26: + mov rax, [0+rsi] + mov r10, [0+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [8+rsi] + mov r10, [8+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [16+rsi] + mov r10, [16+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [24+rsi] + mov r10, [24+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [32+rsi] + mov r10, [32+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [40+rsi] + mov r10, [40+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [48+rsi] + mov r10, [48+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax, r9 + adc rdx, 0 + mov [48+rdi], rax + mov r9, rdx + dec r8 + jz L27 + +L27: + mov rax, r9 + + pop rsi + pop rdi + ret + +s_mpv_mul_add_vec64 ENDP + +END diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s new file mode 100644 index 0000000000..ddd5c40fda --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s @@ -0,0 +1,385 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/ ------------------------------------------------------------------------ +/ +/ Implementation of s_mpv_mul_set_vec which exploits +/ the 64X64->128 bit unsigned multiply instruction. +/ +/ ------------------------------------------------------------------------ + +/ r = a * digit, r and a are vectors of length len +/ returns the carry digit +/ r and a are 64 bit aligned. +/ +/ uint64_t +/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +/ + +.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: + + xorq %rax, %rax / if (len == 0) return (0) + testq %rdx, %rdx + jz .L17 + + movq %rdx, %r8 / Use r8 for len; %rdx is used by mul + xorq %r9, %r9 / cy = 0 + +.L15: + cmpq $8, %r8 / 8 - len + jb .L16 + movq 0(%rsi), %rax / rax = a[0] + movq 8(%rsi), %r11 / prefetch a[1] + mulq %rcx / p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 / prefetch a[2] + mulq %rcx / p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 / prefetch a[3] + mulq %rcx / p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 / prefetch a[4] + mulq %rcx / p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 / prefetch a[5] + mulq %rcx / p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 / prefetch a[6] + mulq %rcx / p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 / prefetch a[7] + mulq %rcx / p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + mulq %rcx / p = a[7] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 56(%rdi) / r[7] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L17 + jmp .L15 + +.L16: + movq 0(%rsi), %rax + mulq %rcx / p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 8(%rsi), %rax + mulq %rcx / p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 16(%rsi), %rax + mulq %rcx / p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 24(%rsi), %rax + mulq %rcx / p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 32(%rsi), %rax + mulq %rcx / p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 40(%rsi), %rax + mulq %rcx / p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 48(%rsi), %rax + mulq %rcx / p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + +.L17: + movq %r9, %rax + ret + +.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 + +/ ------------------------------------------------------------------------ +/ +/ Implementation of s_mpv_mul_add_vec which exploits +/ the 64X64->128 bit unsigned multiply instruction. +/ +/ ------------------------------------------------------------------------ + +/ r += a * digit, r and a are vectors of length len +/ returns the carry digit +/ r and a are 64 bit aligned. +/ +/ uint64_t +/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +/ + +.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: + + xorq %rax, %rax / if (len == 0) return (0) + testq %rdx, %rdx + jz .L27 + + movq %rdx, %r8 / Use r8 for len; %rdx is used by mul + xorq %r9, %r9 / cy = 0 + +.L25: + cmpq $8, %r8 / 8 - len + jb .L26 + movq 0(%rsi), %rax / rax = a[0] + movq 0(%rdi), %r10 / r10 = r[0] + movq 8(%rsi), %r11 / prefetch a[1] + mulq %rcx / p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[0] + movq 8(%rdi), %r10 / prefetch r[1] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 / prefetch a[2] + mulq %rcx / p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[1] + movq 16(%rdi), %r10 / prefetch r[2] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 / prefetch a[3] + mulq %rcx / p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[2] + movq 24(%rdi), %r10 / prefetch r[3] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 / prefetch a[4] + mulq %rcx / p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[3] + movq 32(%rdi), %r10 / prefetch r[4] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 / prefetch a[5] + mulq %rcx / p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[4] + movq 40(%rdi), %r10 / prefetch r[5] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 / prefetch a[6] + mulq %rcx / p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[5] + movq 48(%rdi), %r10 / prefetch r[6] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 / prefetch a[7] + mulq %rcx / p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[6] + movq 56(%rdi), %r10 / prefetch r[7] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + mulq %rcx / p = a[7] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[7] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 56(%rdi) / r[7] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L27 + jmp .L25 + +.L26: + movq 0(%rsi), %rax + movq 0(%rdi), %r10 + mulq %rcx / p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[0] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 8(%rsi), %rax + movq 8(%rdi), %r10 + mulq %rcx / p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[1] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 16(%rsi), %rax + movq 16(%rdi), %r10 + mulq %rcx / p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[2] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 24(%rsi), %rax + movq 24(%rdi), %r10 + mulq %rcx / p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[3] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 32(%rsi), %rax + movq 32(%rdi), %r10 + mulq %rcx / p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[4] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 40(%rsi), %rax + movq 40(%rdi), %r10 + mulq %rcx / p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[5] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 48(%rsi), %rax + movq 48(%rdi), %r10 + mulq %rcx / p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[6] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + +.L27: + movq %r9, %rax + ret + +.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c new file mode 100644 index 0000000000..27e4efdad1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_arm.c @@ -0,0 +1,175 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This inlined version is for 32-bit ARM platform only */ + +#if !defined(__arm__) +#error "This is for ARM only" +#endif + +/* 16-bit thumb doesn't work inlined assember version */ +#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__) + +#include "mpi-priv.h" + +#ifdef MP_ASSEMBLY_MULTIPLY +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm__ __volatile__( + "mov r5, #0\n" +#ifdef __thumb2__ + "cbz %1, 2f\n" +#else + "cmp %1, r5\n" /* r5 is 0 now */ + "beq 2f\n" +#endif + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %3\n" + "str r5, [%2], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + + "2:\n" + "str r5, [%2]\n" + : "+r"(a), "+l"(a_len), "+r"(c) + : "r"(b) + : "memory", "cc", "%r4", "%r5", "%r6"); +} + +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm__ __volatile__( + "mov r5, #0\n" +#ifdef __thumb2__ + "cbz %1, 2f\n" +#else + "cmp %1, r5\n" /* r5 is 0 now */ + "beq 2f\n" +#endif + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%2]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %3\n" + "str r5, [%2], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + + "2:\n" + "str r5, [%2]\n" + : "+r"(a), "+l"(a_len), "+r"(c) + : "r"(b) + : "memory", "cc", "%r4", "%r5", "%r6"); +} + +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + if (!a_len) + return; + + __asm__ __volatile__( + "mov r5, #0\n" + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%2]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %3\n" + "str r5, [%2], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + +#ifdef __thumb2__ + "cbz r4, 3f\n" +#else + "cmp r4, #0\n" + "beq 3f\n" +#endif + + "2:\n" + "mov r4, #0\n" + "ldr r6, [%2]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + "str r5, [%2], #4\n" + "movs r5, r4\n" + "bne 2b\n" + + "3:\n" + : "+r"(a), "+l"(a_len), "+r"(c) + : "r"(b) + : "memory", "cc", "%r4", "%r5", "%r6"); +} +#endif + +#ifdef MP_ASSEMBLY_SQUARE +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ + if (!a_len) + return; + + __asm__ __volatile__( + "mov r3, #0\n" + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%0], #4\n" + "ldr r5, [%2]\n" + "adds r3, r5\n" + "adc r4, r4, #0\n" + "umlal r3, r4, r6, r6\n" /* w = r3:r4 */ + "str r3, [%2], #4\n" + + "ldr r5, [%2]\n" + "adds r3, r4, r5\n" + "mov r4, #0\n" + "adc r4, r4, #0\n" + "str r3, [%2], #4\n" + "mov r3, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + +#ifdef __thumb2__ + "cbz r3, 3f\n" +#else + "cmp r3, #0\n" + "beq 3f\n" +#endif + + "2:\n" + "mov r4, #0\n" + "ldr r5, [%2]\n" + "adds r3, r5\n" + "adc r4, r4, #0\n" + "str r3, [%2], #4\n" + "movs r3, r4\n" + "bne 2b\n" + + "3:" + : "+r"(pa), "+r"(a_len), "+r"(ps) + : + : "memory", "cc", "%r3", "%r4", "%r5", "%r6"); +} +#endif +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c new file mode 100644 index 0000000000..0cea7685d6 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_hp.c @@ -0,0 +1,81 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file contains routines that perform vector multiplication. */ + +#include "mpi-priv.h" +#include + +#include +/* #include */ +#include + +extern void multacc512( + int length, /* doublewords in multiplicand vector. */ + const mp_digit *scalaraddr, /* Address of scalar. */ + const mp_digit *multiplicand, /* The multiplicand vector. */ + mp_digit *result); /* Where to accumulate the result. */ + +extern void maxpy_little( + int length, /* doublewords in multiplicand vector. */ + const mp_digit *scalaraddr, /* Address of scalar. */ + const mp_digit *multiplicand, /* The multiplicand vector. */ + mp_digit *result); /* Where to accumulate the result. */ + +extern void add_diag_little( + int length, /* doublewords in input vector. */ + const mp_digit *root, /* The vector to square. */ + mp_digit *result); /* Where to accumulate the result. */ + +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ + add_diag_little(a_len, pa, ps); +} + +#define MAX_STACK_DIGITS 258 +#define MULTACC512_LEN (512 / MP_DIGIT_BIT) +#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little) + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit x[MAX_STACK_DIGITS]; + mp_digit *px = x; + size_t xSize = 0; + + if (a == c) { + if (a_len > MAX_STACK_DIGITS) { + xSize = sizeof(mp_digit) * (a_len + 2); + px = malloc(xSize); + if (!px) + return; + } + memcpy(px, a, a_len * sizeof(*a)); + a = px; + } + s_mp_setz(c, a_len + 1); + HP_MPY_ADD_FN(a_len, &b, a, c); + if (px != x && px) { + memset(px, 0, xSize); + free(px); + } +} + +/* c += a * b, where a is a_len words long. */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + c[a_len] = 0; /* so carry propagation stops here. */ + HP_MPY_ADD_FN(a_len, &b, a, c); +} + +/* c += a * b, where a is y words long. */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c) +{ + HP_MPY_ADD_FN(a_len, &b, a, c); +} diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s new file mode 100644 index 0000000000..f800396596 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s @@ -0,0 +1,313 @@ +/ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.text + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d +.type s_mpv_mul_d,@function +s_mpv_mul_d: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L2 / jmp if a_len == 0 + mov 8(%ebp),%esi / esi = a + cld +L1: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L1 / jmp if a_len != 0 +L2: + mov %ebx,0(%edi) / *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d_add +.type s_mpv_mul_d_add,@function +s_mpv_mul_d_add: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L4 / jmp if a_len == 0 + mov 8(%ebp),%esi / esi = a + cld +L3: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx / add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L3 / jmp if a_len != 0 +L4: + mov %ebx,0(%edi) / *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d_add_prop +.type s_mpv_mul_d_add_prop,@function +s_mpv_mul_d_add_prop: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L6 / jmp if a_len == 0 + cld + mov 8(%ebp),%esi / esi = a +L5: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx / add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L5 / jmp if a_len != 0 +L6: + cmp $0,%ebx / is carry zero? + jz L8 + mov 0(%edi),%eax / add in current word from *c + add %ebx,%eax + stosl / [es:edi] = ax; edi += 4; + jnc L8 +L7: + mov 0(%edi),%eax / add in current word from *c + adc $0,%eax + stosl / [es:edi] = ax; edi += 4; + jc L7 +L8: + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 20: caller's esi + / ebp - 16: caller's edi + / ebp - 12: + / ebp - 8: carry + / ebp - 4: a_len local + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: pa argument + / ebp + 12: a_len argument + / ebp + 16: ps argument + / ebp + 20: + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr + +.globl s_mpv_sqr_add_prop +.type s_mpv_sqr_add_prop,@function +s_mpv_sqr_add_prop: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / a_len + mov 16(%ebp),%edi / edi = ps + cmp $0,%ecx + je L11 / jump if a_len == 0 + cld + mov 8(%ebp),%esi / esi = pa +L10: + lodsl / %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax / add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax / add low word from result + mov 4(%edi),%ebx + stosl / [es:di] = %eax; di += 4; + adc %ebx,%edx / add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl / [es:di] = %eax; di += 4; + dec %ecx / --a_len + jnz L10 / jmp if a_len != 0 +L11: + cmp $0,%ebx / is carry zero? + jz L14 + mov 0(%edi),%eax / add in current word from *c + add %ebx,%eax + stosl / [es:edi] = ax; edi += 4; + jnc L14 +L12: + mov 0(%edi),%eax / add in current word from *c + adc $0,%eax + stosl / [es:edi] = ax; edi += 4; + jc L12 +L14: + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / + / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + / so its high bit is 1. This code is from NSPR. + / + / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + / mp_digit *qp, mp_digit *rp) + + / esp + 0: Caller's ebx + / esp + 4: return address + / esp + 8: Nhi argument + / esp + 12: Nlo argument + / esp + 16: divisor argument + / esp + 20: qp argument + / esp + 24: rp argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr + / + +.globl s_mpv_div_2dx1d +.type s_mpv_div_2dx1d,@function +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax / return zero + pop %ebx + ret + nop + diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s new file mode 100644 index 0000000000..455792bbba --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_mips.s @@ -0,0 +1,472 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include + .set noreorder + .set noat + + .section .text, 1, 0x00000006, 4, 4 +.text: + .section .text + + .ent s_mpv_mul_d_add + .globl s_mpv_mul_d_add + +s_mpv_mul_d_add: + #/* c += a * b */ + #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.L.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.L.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.L.3 + # a1 = a[1]; + lwu a5,4(a0) +.L.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.L.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.L.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.L.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .L.6 + addiu a3,a3,4 + # } else { +.L.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .L.6 + dsrl32 t2,t0,0 + # } + # } else { +.L.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.L.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.L.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add + + .ent s_mpv_mul_d_add_prop + .globl s_mpv_mul_d_add_prop + +s_mpv_mul_d_add_prop: + #/* c += a * b */ + #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.M.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.M.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.M.3 + # a1 = a[1]; + lwu a5,4(a0) +.M.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.M.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.M.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.M.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .M.6 + addiu a3,a3,8 + # } else { +.M.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + b .M.6 + addiu a3,a3,4 + # } + # } else { +.M.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + addiu a3,a3,4 + # } +.M.6: + + # while (cy) { + beq t2,zero,.M.1 + nop +.M.7: + # mp_word w = (mp_word)*c + cy; + lwu a6,0(a3) + daddu t2,t2,a6 + # *c++ = ACCUM(w); + sw t2,0(a3) + # cy = CARRYOUT(w); + dsrl32 t2,t2,0 + bne t2,zero,.M.7 + addiu a3,a3,4 + + # } +.M.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add_prop + + .ent s_mpv_mul_d + .globl s_mpv_mul_d + +s_mpv_mul_d: + #/* c = a * b */ + #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.N.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.N.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.N.3 + # a1 = a[1]; + lwu a5,4(a0) +.N.4: + # a_len -= 2; + addiu a1,a1,-2 + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # w0 = (mp_word)b * a0; + dmultu a2,a4 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.N.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.N.3: + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.N.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .N.6 + addiu a3,a3,4 + # } else { +.N.5: + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .N.6 + dsrl32 t2,t0,0 + # } + # } else { +.N.2: + mflo t0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.N.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.N.1: + jr ra + nop + #} + # + .end s_mpv_mul_d + + + .ent s_mpv_sqr_add_prop + .globl s_mpv_sqr_add_prop + #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); + # registers + # a0 *a + # a1 a_len + # a2 *sqr + # a3 digit from *a, a_i + # a4 square of digit from a + # a5,a6 next 2 digits in sqr + # a7,t0 carry +s_mpv_sqr_add_prop: + move a7,zero + move t0,zero + lwu a3,0(a0) + addiu a1,a1,-1 # --a_len + dmultu a3,a3 + beq a1,zero,.P.3 # jump if we've already done the only sqr + addiu a0,a0,4 # ++a +.P.2: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + lwu a3,0(a0) + addiu a0,a0,4 # ++a + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + dmultu a3,a3 + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + addiu a1,a1,-1 # --a_len + dsrl32 a4,a4,0 + bne a1,zero,.P.2 # loop if a_len > 0 + sw a4,-4(a2) +.P.3: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + beq t0,zero,.P.9 # jump if no carry + dsrl32 a4,a4,0 +.P.8: + sw a4,-4(a2) + /* propagate final carry */ + lwu a5,0(a2) + daddu a6,a5,t0 + sltu t0,a6,a5 + bne t0,zero,.P.8 # loop if carry persists + addiu a2,a2,4 # sqrs++ +.P.9: + jr ra + sw a4,-4(a2) + + .end s_mpv_sqr_add_prop diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c new file mode 100644 index 0000000000..1e88357af8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sparc.c @@ -0,0 +1,226 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Multiplication performance enhancements for sparc v8+vis CPUs. */ + +#include "mpi-priv.h" +#include +#include +#include + +/* In the functions below, */ +/* vector y must be 8-byte aligned, and n must be even */ +/* returns carry out of high order word of result */ +/* maximum n is 256 */ + +/* vector x += vector y * scaler a; where y is of length n words. */ +extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a); + +/* vector z = vector x + vector y * scaler a; where y is of length n words. */ +extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y, + int n, mp_digit a); + +/* v8 versions of these functions run on any Sparc v8 CPU. */ + +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + unsigned long long product = (unsigned long long)a * b; \ + Plo = (mp_digit)product; \ + Phi = (mp_digit)(product >> MP_DIGIT_BIT); \ + } + +/* c = a * b */ +static void +v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* c += a * b */ +static void +v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +static void +v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + while (d) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + + *c++ = a0b0; + carry = a1b1; + } + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +#endif +} + +/* These functions run only on v8plus+vis or v9+vis CPUs. */ + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + s_mp_setz(c, a_len + 1); + d = mul_add_inp(c, a, a_len, b); + c[a_len] = d; + } else { + v8_mpv_mul_d(a, a_len, b, c); + } +} + +/* c += a * b, where a is a_len words long. */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + d = mul_add_inp(c, a, a_len, b); + c[a_len] = d; + } else { + v8_mpv_mul_d_add(a, a_len, b, c); + } +} + +/* c += a * b, where a is y words long. */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + d = mul_add_inp(c, a, a_len, b); + if (d) { + c += a_len; + do { + mp_digit sum = d + *c; + *c++ = sum; + d = sum < d; + } while (d); + } + } else { + v8_mpv_mul_d_add_prop(a, a_len, b, c); + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s new file mode 100644 index 0000000000..16a47019c3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sse2.s @@ -0,0 +1,294 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef DARWIN +#define s_mpv_mul_d _s_mpv_mul_d +#define s_mpv_mul_d_add _s_mpv_mul_d_add +#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop +#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop +#define s_mpv_div_2dx1d _s_mpv_div_2dx1d +#define TYPE_FUNCTION(x) +#else +#define TYPE_FUNCTION(x) .type x, @function +#endif + +.text + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.private_extern s_mpv_mul_d +TYPE_FUNCTION(s_mpv_mul_d) +s_mpv_mul_d: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.private_extern s_mpv_mul_d_add +TYPE_FUNCTION(s_mpv_mul_d_add) +s_mpv_mul_d_add: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd 0(%edi), %mm0 + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.private_extern s_mpv_mul_d_add_prop +TYPE_FUNCTION(s_mpv_mul_d_add_prop) +s_mpv_mul_d_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add *c++ + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_sqr_add_prop +.private_extern s_mpv_sqr_add_prop +TYPE_FUNCTION(s_mpv_sqr_add_prop) +s_mpv_sqr_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + mov 16(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm0, %mm0 # mm0 = sqr(a) + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add the low word + movd 4(%edi), %mm3 + movd %mm2, 0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3, %mm2 # add the high word + movd %mm2, 4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8, %edi + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # +.globl s_mpv_div_2dx1d +.private_extern s_mpv_div_2dx1d +TYPE_FUNCTION(s_mpv_div_2dx1d) +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp), %edx + mov 12(%esp), %eax + mov 16(%esp), %ebx + div %ebx + mov 20(%esp), %ebx + mov %eax, 0(%ebx) + mov 24(%esp), %ebx + mov %edx, 0(%ebx) + xor %eax, %eax # return zero + pop %ebx + ret + nop + +#ifndef DARWIN + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s new file mode 100644 index 0000000000..8f7e2130c3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86.s @@ -0,0 +1,541 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.data +.align 4 + # + # -1 means to call s_mpi_is_sse to determine if we support sse + # instructions. + # 0 means to use x86 instructions + # 1 means to use sse2 instructions +.type is_sse,@object +.size is_sse,4 +is_sse: .long -1 + +# +# sigh, handle the difference between -fPIC and not PIC +# default to pic, since this file seems to be exclusively +# linux right now (solaris uses mpi_i86pc.s and windows uses +# mpi_x86_asm.c) +# +.ifndef NO_PIC +.macro GET var,reg + movl \var@GOTOFF(%ebx),\reg +.endm +.macro PUT reg,var + movl \reg,\var@GOTOFF(%ebx) +.endm +.else +.macro GET var,reg + movl \var,\reg +.endm +.macro PUT reg,var + movl \reg,\var +.endm +.endif + +.text + + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.type s_mpv_mul_d,@function +s_mpv_mul_d: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_x86 + jg s_mpv_mul_d_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_sse2 +s_mpv_mul_d_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +1: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 6f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +5: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 5b # jmp if a_len != 0 +6: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.type s_mpv_mul_d_add,@function +s_mpv_mul_d_add: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_add_x86 + jg s_mpv_mul_d_add_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_add_sse2 +s_mpv_mul_d_add_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 11f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +10: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 10b # jmp if a_len != 0 +11: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_add_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 16f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +15: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd 0(%edi),%mm0 + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 15b # jmp if a_len != 0 +16: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.type s_mpv_mul_d_add_prop,@function +s_mpv_mul_d_add_prop: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_add_prop_x86 + jg s_mpv_mul_d_add_prop_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_add_prop_sse2 +s_mpv_mul_d_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 21f # jmp if a_len == 0 + cld + mov 8(%ebp),%esi # esi = a +20: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 20b # jmp if a_len != 0 +21: + cmp $0,%ebx # is carry zero? + jz 23f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 23f +22: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 22b +23: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 26f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +25: + movd 0(%esi),%mm0 # mm0 = *a++ + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add *c++ + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 25b # jmp if a_len != 0 +26: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 28f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 28f +27: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 27b +28: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + + # ebp - 20: caller's esi + # ebp - 16: caller's edi + # ebp - 12: + # ebp - 8: carry + # ebp - 4: a_len local + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # ebp + 20: + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + +.globl s_mpv_sqr_add_prop +.type s_mpv_sqr_add_prop,@function +s_mpv_sqr_add_prop: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_sqr_add_prop_x86 + jg s_mpv_sqr_add_prop_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_sqr_add_prop_sse2 +s_mpv_sqr_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # a_len + mov 16(%ebp),%edi # edi = ps + cmp $0,%ecx + je 31f # jump if a_len == 0 + cld + mov 8(%ebp),%esi # esi = pa +30: + lodsl # %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax # add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax # add low word from result + mov 4(%edi),%ebx + stosl # [es:di] = %eax; di += 4; + adc %ebx,%edx # add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl # [es:di] = %eax; di += 4; + dec %ecx # --a_len + jnz 30b # jmp if a_len != 0 +31: + cmp $0,%ebx # is carry zero? + jz 34f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 34f +32: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 32b +34: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_sqr_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 16(%ebp),%edi + cmp $0,%ecx + je 36f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +35: + movd 0(%esi),%mm0 # mm0 = *a + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm0,%mm0 # mm0 = sqr(a) + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add the low word + movd 4(%edi),%mm3 + movd %mm2,0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3,%mm2 # add the high word + movd %mm2,4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8,%edi + dec %ecx # --a_len + jnz 35b # jmp if a_len != 0 +36: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 38f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 38f +37: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 37b +38: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # + +.globl s_mpv_div_2dx1d +.type s_mpv_div_2dx1d,@function +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax # return zero + pop %ebx + ret + nop + + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c new file mode 100644 index 0000000000..4faeef30ca --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c @@ -0,0 +1,531 @@ +/* + * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" + +static int is_sse = -1; +extern unsigned long s_mpi_is_sse2(); + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_x86 + jg s_mpv_mul_d_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_sse2 +s_mpv_mul_d_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_2 ; jmp if a_len == 0 + mov esi,[ebp+8] ; esi = a + cld +L_1: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_1 ; jmp if a_len != 0 +L_2: + mov [edi],ebx ; *c = carry + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_sse2: + push ebp + mov ebp, esp + push edi + push esi + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_6 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_5: + movd mm0, [esi] ; mm0 = *a++ + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_5 ; jmp if a_len != 0 +L_6: + movd [edi], mm2 ; *c = carry + emms + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_add_x86 + jg s_mpv_mul_d_add_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_add_sse2 +s_mpv_mul_d_add_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_11 ; jmp if a_len == 0 + mov esi,[ebp+8] ; esi = a + cld +L_10: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,[edi] ; add in current word from *c + add eax,ebx + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_10 ; jmp if a_len != 0 +L_11: + mov [edi],ebx ; *c = carry + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_add_sse2: + push ebp + mov ebp, esp + push edi + push esi + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_16 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_15: + movd mm0, [esi] ; mm0 = *a++ + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + movd mm0, [edi] + paddq mm2, mm0 ; add the carry + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_15 ; jmp if a_len != 0 +L_16: + movd [edi], mm2 ; *c = carry + emms + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_add_prop_x86 + jg s_mpv_mul_d_add_prop_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_add_prop_sse2 +s_mpv_mul_d_add_prop_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_21 ; jmp if a_len == 0 + cld + mov esi,[ebp+8] ; esi = a +L_20: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,[edi] ; add in current word from *c + add eax,ebx + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_20 ; jmp if a_len != 0 +L_21: + cmp ebx,0 ; is carry zero? + jz L_23 + mov eax,[edi] ; add in current word from *c + add eax,ebx + stosd ; [es:edi] = ax; edi += 4; + jnc L_23 +L_22: + mov eax,[edi] ; add in current word from *c + adc eax,0 + stosd ; [es:edi] = ax; edi += 4; + jc L_22 +L_23: + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_add_prop_sse2: + push ebp + mov ebp, esp + push edi + push esi + push ebx + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_26 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_25: + movd mm0, [esi] ; mm0 = *a++ + movd mm3, [edi] ; fetch the sum + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + paddq mm2, mm3 ; add *c++ + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_25 ; jmp if a_len != 0 +L_26: + movd ebx, mm2 + cmp ebx, 0 ; is carry zero? + jz L_28 + mov eax, [edi] + add eax, ebx + stosd + jnc L_28 +L_27: + mov eax, [edi] ; add in current word from *c + adc eax, 0 + stosd ; [es:edi] = ax; edi += 4; + jc L_27 +L_28: + emms + pop ebx + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 20: caller's esi + * ebp - 16: caller's edi + * ebp - 12: + * ebp - 8: carry + * ebp - 4: a_len local + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: pa argument + * ebp + 12: a_len argument + * ebp + 16: ps argument + * ebp + 20: + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_sqr_add_prop_x86 + jg s_mpv_sqr_add_prop_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_sqr_add_prop_sse2 +s_mpv_sqr_add_prop_x86: + push ebp + mov ebp,esp + sub esp,12 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; a_len + mov edi,[ebp+16] ; edi = ps + cmp ecx,0 + je L_31 ; jump if a_len == 0 + cld + mov esi,[ebp+8] ; esi = pa +L_30: + lodsd ; eax = [ds:si]; si += 4; + mul eax + + add eax,ebx ; add "carry" + adc edx,0 + mov ebx,[edi] + add eax,ebx ; add low word from result + mov ebx,[edi+4] + stosd ; [es:di] = eax; di += 4; + adc edx,ebx ; add high word from result + mov ebx,0 + mov eax,edx + adc ebx,0 + stosd ; [es:di] = eax; di += 4; + dec ecx ; --a_len + jnz L_30 ; jmp if a_len != 0 +L_31: + cmp ebx,0 ; is carry zero? + jz L_34 + mov eax,[edi] ; add in current word from *c + add eax,ebx + stosd ; [es:edi] = ax; edi += 4; + jnc L_34 +L_32: + mov eax,[edi] ; add in current word from *c + adc eax,0 + stosd ; [es:edi] = ax; edi += 4; + jc L_32 +L_34: + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_sqr_add_prop_sse2: + push ebp + mov ebp, esp + push edi + push esi + push ebx + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + mov edi, [ebp+16] + cmp ecx, 0 + je L_36 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_35: + movd mm0, [esi] ; mm0 = *a + movd mm3, [edi] ; fetch the sum + add esi, 4 + pmuludq mm0, mm0 ; mm0 = sqr(a) + paddq mm2, mm0 ; add the carry + paddq mm2, mm3 ; add the low word + movd mm3, [edi+4] + movd [edi], mm2 ; store the 32bit result + psrlq mm2, 32 + paddq mm2, mm3 ; add the high word + movd [edi+4], mm2 ; store the 32bit result + psrlq mm2, 32 ; save the carry. + add edi, 8 + dec ecx ; --a_len + jnz L_35 ; jmp if a_len != 0 +L_36: + movd ebx, mm2 + cmp ebx, 0 ; is carry zero? + jz L_38 + mov eax, [edi] + add eax, ebx + stosd + jnc L_38 +L_37: + mov eax, [edi] ; add in current word from *c + adc eax, 0 + stosd ; [es:edi] = ax; edi += 4; + jc L_37 +L_38: + emms + pop ebx + pop esi + pop edi + leave + ret + nop + } +} + +/* + * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + * so its high bit is 1. This code is from NSPR. + * + * Dump of assembler code for function s_mpv_div_2dx1d: + * + * esp + 0: Caller's ebx + * esp + 4: return address + * esp + 8: Nhi argument + * esp + 12: Nlo argument + * esp + 16: divisor argument + * esp + 20: qp argument + * esp + 24: rp argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) mp_err + s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + mp_digit *qp, mp_digit *rp) +{ + __asm { + push ebx + mov edx,[esp+8] + mov eax,[esp+12] + mov ebx,[esp+16] + div ebx + mov ebx,[esp+20] + mov [ebx],eax + mov ebx,[esp+24] + mov [ebx],edx + xor eax,eax ; return zero + pop ebx + ret + nop + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s new file mode 100644 index 0000000000..b903e2564a --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s @@ -0,0 +1,538 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.data +.align 4 + # + # -1 means to call _s_mpi_is_sse to determine if we support sse + # instructions. + # 0 means to use x86 instructions + # 1 means to use sse2 instructions +.type is_sse,@object +.size is_sse,4 +is_sse: .long -1 + +# +# sigh, handle the difference between -fPIC and not PIC +# default to pic, since this file seems to be exclusively +# linux right now (solaris uses mpi_i86pc.s and windows uses +# mpi_x86_asm.c) +# +#.ifndef NO_PIC +#.macro GET var,reg +# movl \var@GOTOFF(%ebx),\reg +#.endm +#.macro PUT reg,var +# movl \reg,\var@GOTOFF(%ebx) +#.endm +#.else +.macro GET var,reg + movl \var,\reg +.endm +.macro PUT reg,var + movl \reg,\var +.endm +#.endif + +.text + + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d +.type _s_mpv_mul_d,@function +_s_mpv_mul_d: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_x86 + jg _s_mpv_mul_d_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_sse2 +_s_mpv_mul_d_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +1: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 6f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +5: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 5b # jmp if a_len != 0 +6: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d_add +.type _s_mpv_mul_d_add,@function +_s_mpv_mul_d_add: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_add_x86 + jg _s_mpv_mul_d_add_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_add_sse2 +_s_mpv_mul_d_add_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 11f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +10: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 10b # jmp if a_len != 0 +11: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_add_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 16f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +15: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd 0(%edi),%mm0 + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 15b # jmp if a_len != 0 +16: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d_add_prop +.type _s_mpv_mul_d_add_prop,@function +_s_mpv_mul_d_add_prop: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_add_prop_x86 + jg _s_mpv_mul_d_add_prop_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_add_prop_sse2 +_s_mpv_mul_d_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 21f # jmp if a_len == 0 + cld + mov 8(%ebp),%esi # esi = a +20: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 20b # jmp if a_len != 0 +21: + cmp $0,%ebx # is carry zero? + jz 23f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 23f +22: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 22b +23: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 26f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +25: + movd 0(%esi),%mm0 # mm0 = *a++ + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add *c++ + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 25b # jmp if a_len != 0 +26: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 28f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 28f +27: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 27b +28: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + + # ebp - 20: caller's esi + # ebp - 16: caller's edi + # ebp - 12: + # ebp - 8: carry + # ebp - 4: a_len local + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # ebp + 20: + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + +.globl _s_mpv_sqr_add_prop +.type _s_mpv_sqr_add_prop,@function +_s_mpv_sqr_add_prop: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_sqr_add_prop_x86 + jg _s_mpv_sqr_add_prop_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_sqr_add_prop_sse2 +_s_mpv_sqr_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # a_len + mov 16(%ebp),%edi # edi = ps + cmp $0,%ecx + je 31f # jump if a_len == 0 + cld + mov 8(%ebp),%esi # esi = pa +30: + lodsl # %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax # add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax # add low word from result + mov 4(%edi),%ebx + stosl # [es:di] = %eax; di += 4; + adc %ebx,%edx # add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl # [es:di] = %eax; di += 4; + dec %ecx # --a_len + jnz 30b # jmp if a_len != 0 +31: + cmp $0,%ebx # is carry zero? + jz 34f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 34f +32: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 32b +34: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_sqr_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 16(%ebp),%edi + cmp $0,%ecx + je 36f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +35: + movd 0(%esi),%mm0 # mm0 = *a + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm0,%mm0 # mm0 = sqr(a) + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add the low word + movd 4(%edi),%mm3 + movd %mm2,0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3,%mm2 # add the high word + movd %mm2,4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8,%edi + dec %ecx # --a_len + jnz 35b # jmp if a_len != 0 +36: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 38f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 38f +37: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 37b +38: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # + +.globl _s_mpv_div_2dx1d +.type _s_mpv_div_2dx1d,@function +_s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax # return zero + pop %ebx + ret + nop + diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c new file mode 100644 index 0000000000..db19cff138 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mplogic.c @@ -0,0 +1,460 @@ +/* + * mplogic.c + * + * Bitwise logical operations on MPI values + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mplogic.h" + +/* {{{ Lookup table for population count */ + +static unsigned char bitc[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_not(a, b) - compute b = ~a + mpl_and(a, b, c) - compute c = a & b + mpl_or(a, b, c) - compute c = a | b + mpl_xor(a, b, c) - compute c = a ^ b + */ + +/* {{{ mpl_not(a, b) */ + +mp_err +mpl_not(mp_int *a, mp_int *b) +{ + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + /* This relies on the fact that the digit type is unsigned */ + for (ix = 0; ix < USED(b); ix++) + DIGIT(b, ix) = ~DIGIT(b, ix); + + s_mp_clamp(b); + + return MP_OKAY; + +} /* end mpl_not() */ + +/* }}} */ + +/* {{{ mpl_and(a, b, c) */ + +mp_err +mpl_and(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) <= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) &= DIGIT(other, ix); + + s_mp_clamp(c); + + return MP_OKAY; + +} /* end mpl_and() */ + +/* }}} */ + +/* {{{ mpl_or(a, b, c) */ + +mp_err +mpl_or(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) >= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) |= DIGIT(other, ix); + + return MP_OKAY; + +} /* end mpl_or() */ + +/* }}} */ + +/* {{{ mpl_xor(a, b, c) */ + +mp_err +mpl_xor(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) >= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) ^= DIGIT(other, ix); + + s_mp_clamp(c); + + return MP_OKAY; + +} /* end mpl_xor() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_rsh(a, b, d) - b = a >> d + mpl_lsh(a, b, d) - b = a << d + */ + +/* {{{ mpl_rsh(a, b, d) */ + +mp_err +mpl_rsh(const mp_int *a, mp_int *b, mp_digit d) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + s_mp_div_2d(b, d); + + return MP_OKAY; + +} /* end mpl_rsh() */ + +/* }}} */ + +/* {{{ mpl_lsh(a, b, d) */ + +mp_err +mpl_lsh(const mp_int *a, mp_int *b, mp_digit d) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + return s_mp_mul_2d(b, d); + +} /* end mpl_lsh() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_num_set(a, num) + + Count the number of set bits in the binary representation of a. + Returns MP_OKAY and sets 'num' to be the number of such bits, if + possible. If num is NULL, the result is thrown away, but it is + not considered an error. + + mpl_num_clear() does basically the same thing for clear bits. + */ + +/* {{{ mpl_num_set(a, num) */ + +mp_err +mpl_num_set(mp_int *a, unsigned int *num) +{ + unsigned int ix, db, nset = 0; + mp_digit cur; + unsigned char reg; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + cur = DIGIT(a, ix); + + for (db = 0; db < sizeof(mp_digit); db++) { + reg = (unsigned char)(cur >> (CHAR_BIT * db)); + + nset += bitc[reg]; + } + } + + if (num) + *num = nset; + + return MP_OKAY; + +} /* end mpl_num_set() */ + +/* }}} */ + +/* {{{ mpl_num_clear(a, num) */ + +mp_err +mpl_num_clear(mp_int *a, unsigned int *num) +{ + unsigned int ix, db, nset = 0; + mp_digit cur; + unsigned char reg; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + cur = DIGIT(a, ix); + + for (db = 0; db < sizeof(mp_digit); db++) { + reg = (unsigned char)(cur >> (CHAR_BIT * db)); + + nset += bitc[UCHAR_MAX - reg]; + } + } + + if (num) + *num = nset; + + return MP_OKAY; + +} /* end mpl_num_clear() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_parity(a) + + Determines the bitwise parity of the value given. Returns MP_EVEN + if an even number of digits are set, MP_ODD if an odd number are + set. + */ + +/* {{{ mpl_parity(a) */ + +mp_err +mpl_parity(mp_int *a) +{ + unsigned int ix; + int par = 0; + mp_digit cur; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + int shft = (sizeof(mp_digit) * CHAR_BIT) / 2; + + cur = DIGIT(a, ix); + + /* Compute parity for current digit */ + while (shft != 0) { + cur ^= (cur >> shft); + shft >>= 1; + } + cur &= 1; + + /* XOR with running parity so far */ + par ^= cur; + } + + if (par) + return MP_ODD; + else + return MP_EVEN; + +} /* end mpl_parity() */ + +/* }}} */ + +/* + mpl_set_bit + + Returns MP_OKAY or some error code. + Grows a if needed to set a bit to 1. + */ +mp_err +mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value) +{ + mp_size ix; + mp_err rv; + mp_digit mask; + + ARGCHK(a != NULL, MP_BADARG); + + ix = bitNum / MP_DIGIT_BIT; + if (ix + 1 > MP_USED(a)) { + rv = s_mp_pad(a, ix + 1); + if (rv != MP_OKAY) + return rv; + } + + bitNum = bitNum % MP_DIGIT_BIT; + mask = (mp_digit)1 << bitNum; + if (value) + MP_DIGIT(a, ix) |= mask; + else + MP_DIGIT(a, ix) &= ~mask; + s_mp_clamp(a); + return MP_OKAY; +} + +/* + mpl_get_bit + + returns 0 or 1 or some (negative) error code. + */ +mp_err +mpl_get_bit(const mp_int *a, mp_size bitNum) +{ + mp_size bit, ix; + mp_err rv; + + ARGCHK(a != NULL, MP_BADARG); + + ix = bitNum / MP_DIGIT_BIT; + ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE); + + bit = bitNum % MP_DIGIT_BIT; + rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1; + return rv; +} + +/* + mpl_get_bits + - Extracts numBits bits from a, where the least significant extracted bit + is bit lsbNum. Returns a negative value if error occurs. + - Because sign bit is used to indicate error, maximum number of bits to + be returned is the lesser of (a) the number of bits in an mp_digit, or + (b) one less than the number of bits in an mp_err. + - lsbNum + numbits can be greater than the number of significant bits in + integer a, as long as bit lsbNum is in the high order digit of a. + */ +mp_err +mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits) +{ + mp_size rshift = (lsbNum % MP_DIGIT_BIT); + mp_size lsWndx = (lsbNum / MP_DIGIT_BIT); + mp_digit *digit = MP_DIGITS(a) + lsWndx; + mp_digit mask = ((1 << numBits) - 1); + + ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG); + ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE); + + if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) || + (lsWndx + 1 >= MP_USED(a))) { + mask &= (digit[0] >> rshift); + } else { + mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift))); + } + return (mp_err)mask; +} + +#define LZCNTLOOP(i) \ + do { \ + x = d >> (i); \ + mask = (0 - x); \ + mask = (0 - (mask >> (MP_DIGIT_BIT - 1))); \ + bits += (i)&mask; \ + d ^= (x ^ d) & mask; \ + } while (0) + +/* + mpl_significant_bits + returns number of significant bits in abs(a). + In other words: floor(lg(abs(a))) + 1. + returns 1 if value is zero. + */ +mp_size +mpl_significant_bits(const mp_int *a) +{ + /* + start bits at 1. + lg(0) = 0 => bits = 1 by function semantics. + below does a binary search for the _position_ of the top bit set, + which is floor(lg(abs(a))) for a != 0. + */ + mp_size bits = 1; + int ix; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = MP_USED(a); ix > 0;) { + mp_digit d, x, mask; + if ((d = MP_DIGIT(a, --ix)) == 0) + continue; +#if !defined(MP_USE_UINT_DIGIT) + LZCNTLOOP(32); +#endif + LZCNTLOOP(16); + LZCNTLOOP(8); + LZCNTLOOP(4); + LZCNTLOOP(2); + LZCNTLOOP(1); + break; + } + bits += ix * MP_DIGIT_BIT; + return bits; +} + +#undef LZCNTLOOP + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h new file mode 100644 index 0000000000..71b7551392 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mplogic.h @@ -0,0 +1,55 @@ +/* + * mplogic.h + * + * Bitwise logical operations on MPI values + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MPLOGIC_ +#define _H_MPLOGIC_ + +#include "mpi.h" +SEC_BEGIN_PROTOS + +/* + The logical operations treat an mp_int as if it were a bit vector, + without regard to its sign (an mp_int is represented in a signed + magnitude format). Values are treated as if they had an infinite + string of zeros left of the most-significant bit. + */ + +/* Parity results */ + +#define MP_EVEN MP_YES +#define MP_ODD MP_NO + +/* Bitwise functions */ + +mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */ +mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */ +mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */ +mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */ + +/* Shift functions */ + +mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */ +mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */ + +/* Bit count and parity */ + +mp_err mpl_num_set(mp_int *a, unsigned int *num); /* count set bits */ +mp_err mpl_num_clear(mp_int *a, unsigned int *num); /* count clear bits */ +mp_err mpl_parity(mp_int *a); /* determine parity */ + +/* Get & Set the value of a bit */ + +mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value); +mp_err mpl_get_bit(const mp_int *a, mp_size bitNum); +mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits); +mp_size mpl_significant_bits(const mp_int *a); + +SEC_END_PROTOS + +#endif /* end _H_MPLOGIC_ */ diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c new file mode 100644 index 0000000000..36ae51b352 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpmontg.c @@ -0,0 +1,1151 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file implements moduluar exponentiation using Montgomery's + * method for modular reduction. This file implements the method + * described as "Improvement 2" in the paper "A Cryptogrpahic Library for + * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr. + * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90" + * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244, + * published by Springer Verlag. + */ + +#define MP_USING_CACHE_SAFE_MOD_EXP 1 +#include +#include "mpi-priv.h" +#include "mplogic.h" +#include "mpprime.h" +#ifdef MP_USING_MONT_MULF +#include "montmulf.h" +#endif +#include /* ptrdiff_t */ +#include + +#define STATIC + +#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */ + +/*! computes T = REDC(T), 2^b == R + \param T < RN +*/ +mp_err +s_mp_redc(mp_int *T, mp_mont_modulus *mmm) +{ + mp_err res; + mp_size i; + + i = (MP_USED(&mmm->N) << 1) + 1; + MP_CHECKOK(s_mp_pad(T, i)); + for (i = 0; i < MP_USED(&mmm->N); ++i) { + mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; + /* T += N * m_i * (MP_RADIX ** i); */ + s_mp_mul_d_add_offset(&mmm->N, m_i, T, i); + } + s_mp_clamp(T); + + /* T /= R */ + s_mp_rshd(T, MP_USED(&mmm->N)); + + if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { + /* T = T - N */ + MP_CHECKOK(s_mp_sub(T, &mmm->N)); +#ifdef DEBUG + if ((res = mp_cmp(T, &mmm->N)) >= 0) { + res = MP_UNDEF; + goto CLEANUP; + } +#endif + } + res = MP_OKAY; +CLEANUP: + return res; +} + +#if !defined(MP_MONT_USE_MP_MUL) + +/*! c <- REDC( a * b ) mod N + \param a < N i.e. "reduced" + \param b < N i.e. "reduced" + \param mmm modulus N and n0' of N +*/ +mp_err +s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, + mp_mont_modulus *mmm) +{ + mp_digit *pb; + mp_digit m_i; + mp_err res; + mp_size ib; /* "index b": index of current digit of B */ + mp_size useda, usedb; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + ib = (MP_USED(&mmm->N) << 1) + 1; + if ((res = s_mp_pad(c, ib)) != MP_OKAY) + goto CLEANUP; + + useda = MP_USED(a); + pb = MP_DIGITS(b); + s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); + s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); + m_i = MP_DIGIT(c, 0) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); + + /* Outer loop: Digits of b */ + usedb = MP_USED(b); + for (ib = 1; ib < usedb; ib++) { + mp_digit b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); + m_i = MP_DIGIT(c, ib) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); + } + if (usedb < MP_USED(&mmm->N)) { + for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { + m_i = MP_DIGIT(c, ib) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); + } + } + s_mp_clamp(c); + s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ + if (s_mp_cmp(c, &mmm->N) >= 0) { + MP_CHECKOK(s_mp_sub(c, &mmm->N)); + } + res = MP_OKAY; + +CLEANUP: + return res; +} +#endif + +STATIC +mp_err +s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont) +{ + mp_err res; + + /* xMont = x * R mod N where N is modulus */ + MP_CHECKOK(mp_copy(x, xMont)); + MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */ + MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */ +CLEANUP: + return res; +} + +#ifdef MP_USING_MONT_MULF + +/* the floating point multiply is already cache safe, + * don't turn on cache safe unless we specifically + * force it */ +#ifndef MP_FORCE_CACHE_SAFE +#undef MP_USING_CACHE_SAFE_MOD_EXP +#endif + +unsigned int mp_using_mont_mulf = 1; + +/* computes montgomery square of the integer in mResult */ +#define SQR \ + conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \ + mont_mulf_noconv(mResult, dm1, d16Tmp, \ + dTmp, dn, MP_DIGITS(modulus), nLen, dn0) + +/* computes montgomery product of x and the integer in mResult */ +#define MUL(x) \ + conv_i32_to_d32(dm1, mResult, nLen); \ + mont_mulf_noconv(mResult, dm1, oddPowers[x], \ + dTmp, dn, MP_DIGITS(modulus), nLen, dn0) + +/* Do modular exponentiation using floating point multiply code. */ +mp_err +mp_exptmod_f(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size odd_ints) +{ + mp_digit *mResult; + double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp; + double dn0; + mp_size i; + mp_err res; + int expOff; + int dSize = 0, oddPowSize, dTmpSize; + mp_int accum1; + double *oddPowers[MAX_ODD_INTS]; + + /* function for computing n0prime only works if n0 is odd */ + + MP_DIGITS(&accum1) = 0; + + for (i = 0; i < MAX_ODD_INTS; ++i) + oddPowers[i] = 0; + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + + mp_set(&accum1, 1); + MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + MP_CHECKOK(s_mp_pad(&accum1, nLen)); + + oddPowSize = 2 * nLen + 1; + dTmpSize = 2 * oddPowSize; + dSize = sizeof(double) * (nLen * 4 + 1 + + ((odd_ints + 1) * oddPowSize) + dTmpSize); + dBuf = malloc(dSize); + if (!dBuf) { + res = MP_MEM; + goto CLEANUP; + } + dm1 = dBuf; /* array of d32 */ + dn = dBuf + nLen; /* array of d32 */ + dSqr = dn + nLen; /* array of d32 */ + d16Tmp = dSqr + nLen; /* array of d16 */ + dTmp = d16Tmp + oddPowSize; + + for (i = 0; i < odd_ints; ++i) { + oddPowers[i] = dTmp; + dTmp += oddPowSize; + } + mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */ + + /* Make dn and dn0 */ + conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen); + dn0 = (double)(mmm->n0prime & 0xffff); + + /* Make dSqr */ + conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen); + mont_mulf_noconv(mResult, dm1, oddPowers[0], + dTmp, dn, MP_DIGITS(modulus), nLen, dn0); + conv_i32_to_d32(dSqr, mResult, nLen); + + for (i = 1; i < odd_ints; ++i) { + mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1], + dTmp, dn, MP_DIGITS(modulus), nLen, dn0); + conv_i32_to_d16(oddPowers[i], mResult, nLen); + } + + s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */ + + for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + if (window_bits == 1) { + if (!smallExp) { + SQR; + } else if (smallExp & 1) { + SQR; + MUL(0); + } else { + abort(); + } + } else if (window_bits == 4) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else { + abort(); + } + } else if (window_bits == 5) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else if (smallExp & 0x10) { + SQR; + MUL(smallExp / 32); + SQR; + SQR; + SQR; + SQR; + } else { + abort(); + } + } else if (window_bits == 6) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + SQR; + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else if (smallExp & 0x10) { + SQR; + SQR; + MUL(smallExp / 32); + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 0x20) { + SQR; + MUL(smallExp / 64); + SQR; + SQR; + SQR; + SQR; + SQR; + } else { + abort(); + } + } else { + abort(); + } + } + + s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */ + + res = s_mp_redc(&accum1, mmm); + mp_exch(&accum1, result); + +CLEANUP: + mp_clear(&accum1); + if (dBuf) { + if (dSize) + memset(dBuf, 0, dSize); + free(dBuf); + } + + return res; +} +#undef SQR +#undef MUL +#endif + +#define SQR(a, b) \ + MP_CHECKOK(mp_sqr(a, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) + +#if defined(MP_MONT_USE_MP_MUL) +#define MUL(x, a, b) \ + MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) +#else +#define MUL(x, a, b) \ + MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm)) +#endif + +#define SWAPPA \ + ptmp = pa1; \ + pa1 = pa2; \ + pa2 = ptmp + +/* Do modular exponentiation using integer multiply code. */ +mp_err +mp_exptmod_i(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size odd_ints) +{ + mp_int *pa1, *pa2, *ptmp; + mp_size i; + mp_err res; + int expOff; + mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS]; + + /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */ + /* oddPowers[i] = base ** (2*i + 1); */ + + MP_DIGITS(&accum1) = 0; + MP_DIGITS(&accum2) = 0; + MP_DIGITS(&power2) = 0; + for (i = 0; i < MAX_ODD_INTS; ++i) { + MP_DIGITS(oddPowers + i) = 0; + } + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2)); + + MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase)); + + MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2)); + MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */ + MP_CHECKOK(s_mp_redc(&power2, mmm)); + + for (i = 1; i < odd_ints; ++i) { + MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2)); + MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i)); + MP_CHECKOK(s_mp_redc(oddPowers + i, mmm)); + } + + /* set accumulator to montgomery residue of 1 */ + mp_set(&accum1, 1); + MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + pa1 = &accum1; + pa2 = &accum2; + + for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + if (window_bits == 1) { + if (!smallExp) { + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + MUL(0, pa2, pa1); + } else { + abort(); + } + } else if (window_bits == 4) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 2, pa1, pa2); + SWAPPA; + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 4, pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 8, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 8) { + SQR(pa1, pa2); + MUL(smallExp / 16, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else { + abort(); + } + } else if (window_bits == 5) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 2, pa2, pa1); + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 4, pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 8, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 8) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 16, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 0x10) { + SQR(pa1, pa2); + MUL(smallExp / 32, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else { + abort(); + } + } else if (window_bits == 6) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 2, pa1, pa2); + SWAPPA; + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 4, pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 8, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 8) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 16, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 0x10) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 32, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 0x20) { + SQR(pa1, pa2); + MUL(smallExp / 64, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else { + abort(); + } + } else { + abort(); + } + } + + res = s_mp_redc(pa1, mmm); + mp_exch(pa1, result); + +CLEANUP: + mp_clear(&accum1); + mp_clear(&accum2); + mp_clear(&power2); + for (i = 0; i < odd_ints; ++i) { + mp_clear(oddPowers + i); + } + return res; +} +#undef SQR +#undef MUL + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP +unsigned int mp_using_cache_safe_exp = 1; +#endif + +mp_err +mp_set_safe_modexp(int value) +{ +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + mp_using_cache_safe_exp = value; + return MP_OKAY; +#else + if (value == 0) { + return MP_OKAY; + } + return MP_BADARG; +#endif +} + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP +#define WEAVE_WORD_SIZE 4 + +/* + * mpi_to_weave takes an array of bignums, a matrix in which each bignum + * occupies all the columns of a row, and transposes it into a matrix in + * which each bignum occupies a column of every row. The first row of the + * input matrix becomes the first column of the output matrix. The n'th + * row of input becomes the n'th column of output. The input data is said + * to be "interleaved" or "woven" into the output matrix. + * + * The array of bignums is left in this woven form. Each time a single + * bignum value is needed, it is recreated by fetching the n'th column, + * forming a single row which is the new bignum. + * + * The purpose of this interleaving is make it impossible to determine which + * of the bignums is being used in any one operation by examining the pattern + * of cache misses. + * + * The weaving function does not transpose the entire input matrix in one call. + * It transposes 4 rows of mp_ints into their respective columns of output. + * + * This implementation treats each mp_int bignum as an array of mp_digits, + * It stores those bytes as a column of mp_digits in the output matrix. It + * doesn't care if the machine uses big-endian or little-endian byte ordering + * within mp_digits. + * + * "bignums" is an array of mp_ints. + * It points to four rows, four mp_ints, a subset of a larger array of mp_ints. + * + * "weaved" is the weaved output matrix. + * The first byte of bignums[0] is stored in weaved[0]. + * + * "nBignums" is the total number of bignums in the array of which "bignums" + * is a part. + * + * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. + * mp_ints that use less than nDigits digits are logically padded with zeros + * while being stored in the weaved array. + */ +mp_err +mpi_to_weave(const mp_int *bignums, + mp_digit *weaved, + mp_size nDigits, /* in each mp_int of input */ + mp_size nBignums) /* in the entire source array */ +{ + mp_size i; + mp_digit *endDest = weaved + (nDigits * nBignums); + + for (i = 0; i < WEAVE_WORD_SIZE; i++) { + mp_size used = MP_USED(&bignums[i]); + mp_digit *pSrc = MP_DIGITS(&bignums[i]); + mp_digit *endSrc = pSrc + used; + mp_digit *pDest = weaved + i; + + ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG); + ARGCHK(used <= nDigits, MP_BADARG); + + for (; pSrc < endSrc; pSrc++) { + *pDest = *pSrc; + pDest += nBignums; + } + while (pDest < endDest) { + *pDest = 0; + pDest += nBignums; + } + } + + return MP_OKAY; +} + +/* + * These functions return 0xffffffff if the output is true, and 0 otherwise. + */ +#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1))) +#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1)) +#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b)) + +/* Reverse the operation above for one mp_int. + * Reconstruct one mp_int from its column in the weaved array. + * Every read accesses every element of the weaved array, in order to + * avoid timing attacks based on patterns of memory accesses. + */ +mp_err +weave_to_mpi(mp_int *a, /* out, result */ + const mp_digit *weaved, /* in, byte matrix */ + mp_size index, /* which column to read */ + mp_size nDigits, /* number of mp_digits in each bignum */ + mp_size nBignums) /* width of the matrix */ +{ + /* these are indices, but need to be the same size as mp_digit + * because of the CONST_TIME operations */ + mp_digit i, j; + mp_digit d; + mp_digit *pDest = MP_DIGITS(a); + + MP_SIGN(a) = MP_ZPOS; + MP_USED(a) = nDigits; + + assert(weaved != NULL); + + /* Fetch the proper column in constant time, indexing over the whole array */ + for (i = 0; i < nDigits; ++i) { + d = 0; + for (j = 0; j < nBignums; ++j) { + d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index); + } + pDest[i] = d; + } + + s_mp_clamp(a); + return MP_OKAY; +} + +#define SQR(a, b) \ + MP_CHECKOK(mp_sqr(a, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) + +#if defined(MP_MONT_USE_MP_MUL) +#define MUL_NOWEAVE(x, a, b) \ + MP_CHECKOK(mp_mul(a, x, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) +#else +#define MUL_NOWEAVE(x, a, b) \ + MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm)) +#endif + +#define MUL(x, a, b) \ + MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \ + MUL_NOWEAVE(&tmp, a, b) + +#define SWAPPA \ + ptmp = pa1; \ + pa1 = pa2; \ + pa2 = ptmp +#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y))) + +/* Do modular exponentiation using integer multiply code. */ +mp_err +mp_exptmod_safe_i(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size num_powers) +{ + mp_int *pa1, *pa2, *ptmp; + mp_size i; + mp_size first_window; + mp_err res; + int expOff; + mp_int accum1, accum2, accum[WEAVE_WORD_SIZE]; + mp_int tmp; + mp_digit *powersArray = NULL; + mp_digit *powers = NULL; + + MP_DIGITS(&accum1) = 0; + MP_DIGITS(&accum2) = 0; + MP_DIGITS(&accum[0]) = 0; + MP_DIGITS(&accum[1]) = 0; + MP_DIGITS(&accum[2]) = 0; + MP_DIGITS(&accum[3]) = 0; + MP_DIGITS(&tmp) = 0; + + /* grab the first window value. This allows us to preload accumulator1 + * and save a conversion, some squares and a multiple*/ + MP_CHECKOK(mpl_get_bits(exponent, + bits_in_exponent - window_bits, window_bits)); + first_window = (mp_size)res; + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2)); + + /* build the first WEAVE_WORD powers inline */ + /* if WEAVE_WORD_SIZE is not 4, this code will have to change */ + if (num_powers > 2) { + MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2)); + mp_set(&accum[0], 1); + MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0])); + MP_CHECKOK(mp_copy(montBase, &accum[1])); + SQR(montBase, &accum[2]); + MUL_NOWEAVE(montBase, &accum[2], &accum[3]); + powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1)); + if (!powersArray) { + res = MP_MEM; + goto CLEANUP; + } + /* powers[i] = base ** (i); */ + powers = (mp_digit *)MP_ALIGN(powersArray, num_powers); + MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers)); + if (first_window < 4) { + MP_CHECKOK(mp_copy(&accum[first_window], &accum1)); + first_window = num_powers; + } + } else { + if (first_window == 0) { + mp_set(&accum1, 1); + MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + } else { + /* assert first_window == 1? */ + MP_CHECKOK(mp_copy(montBase, &accum1)); + } + } + + /* + * calculate all the powers in the powers array. + * this adds 2**(k-1)-2 square operations over just calculating the + * odd powers where k is the window size in the two other mp_modexpt + * implementations in this file. We will get some of that + * back by not needing the first 'k' squares and one multiply for the + * first window. + * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if + * num_powers > 2, in which case powers will have been allocated. + */ + for (i = WEAVE_WORD_SIZE; i < num_powers; i++) { + int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */ + if (i & 1) { + MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]); + /* we've filled the array do our 'per array' processing */ + if (acc_index == (WEAVE_WORD_SIZE - 1)) { + MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1), + nLen, num_powers)); + + if (first_window <= i) { + MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)], + &accum1)); + first_window = num_powers; + } + } + } else { + /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source + * and target are the same so we need to copy.. After that, the + * value is overwritten, so we need to fetch it from the stored + * weave array */ + if (i > 2 * WEAVE_WORD_SIZE) { + MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers)); + SQR(&accum2, &accum[acc_index]); + } else { + int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1); + if (half_power_index == acc_index) { + /* copy is cheaper than weave_to_mpi */ + MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2)); + SQR(&accum2, &accum[acc_index]); + } else { + SQR(&accum[half_power_index], &accum[acc_index]); + } + } + } + } +/* if the accum1 isn't set, Then there is something wrong with our logic + * above and is an internal programming error. + */ +#if MP_ARGCHK == 2 + assert(MP_USED(&accum1) != 0); +#endif + + /* set accumulator to montgomery residue of 1 */ + pa1 = &accum1; + pa2 = &accum2; + + /* tmp is not used if window_bits == 1. */ + if (window_bits != 1) { + MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2)); + } + + for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + /* handle unroll the loops */ + switch (window_bits) { + case 1: + if (!smallExp) { + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + MUL_NOWEAVE(montBase, pa2, pa1); + } else { + abort(); + } + break; + case 6: + SQR(pa1, pa2); + SQR(pa2, pa1); + /* fall through */ + case 4: + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp, pa1, pa2); + SWAPPA; + break; + case 5: + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp, pa2, pa1); + break; + default: + abort(); /* could do a loop? */ + } + } + + res = s_mp_redc(pa1, mmm); + mp_exch(pa1, result); + +CLEANUP: + mp_clear(&accum1); + mp_clear(&accum2); + mp_clear(&accum[0]); + mp_clear(&accum[1]); + mp_clear(&accum[2]); + mp_clear(&accum[3]); + mp_clear(&tmp); + /* zero required by FIPS here, can't use PORT_ZFree + * because mpi doesn't link with util */ + if (powers) { + PORT_Memset(powers, 0, num_powers * sizeof(mp_digit)); + } + free(powersArray); + return res; +} +#undef SQR +#undef MUL +#endif + +mp_err +mp_exptmod(const mp_int *inBase, const mp_int *exponent, + const mp_int *modulus, mp_int *result) +{ + const mp_int *base; + mp_size bits_in_exponent, i, window_bits, odd_ints; + mp_err res; + int nLen; + mp_int montBase, goodBase; + mp_mont_modulus mmm; +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + static unsigned int max_window_bits; +#endif + + /* function for computing n0prime only works if n0 is odd */ + if (!mp_isodd(modulus)) + return s_mp_exptmod(inBase, exponent, modulus, result); + + MP_DIGITS(&montBase) = 0; + MP_DIGITS(&goodBase) = 0; + + if (mp_cmp(inBase, modulus) < 0) { + base = inBase; + } else { + MP_CHECKOK(mp_init(&goodBase)); + base = &goodBase; + MP_CHECKOK(mp_mod(inBase, modulus, &goodBase)); + } + + nLen = MP_USED(modulus); + MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2)); + + mmm.N = *modulus; /* a copy of the mp_int struct */ + + /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX + ** where n0 = least significant mp_digit of N, the modulus. + */ + mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0)); + + MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase)); + + bits_in_exponent = mpl_significant_bits(exponent); +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + if (mp_using_cache_safe_exp) { + if (bits_in_exponent > 780) + window_bits = 6; + else if (bits_in_exponent > 256) + window_bits = 5; + else if (bits_in_exponent > 20) + window_bits = 4; + /* RSA public key exponents are typically under 20 bits (common values + * are: 3, 17, 65537) and a 4-bit window is inefficient + */ + else + window_bits = 1; + } else +#endif + if (bits_in_exponent > 480) + window_bits = 6; + else if (bits_in_exponent > 160) + window_bits = 5; + else if (bits_in_exponent > 20) + window_bits = 4; + /* RSA public key exponents are typically under 20 bits (common values + * are: 3, 17, 65537) and a 4-bit window is inefficient + */ + else + window_bits = 1; + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + /* + * clamp the window size based on + * the cache line size. + */ + if (!max_window_bits) { + unsigned long cache_size = s_mpi_getProcessorLineSize(); + /* processor has no cache, use 'fast' code always */ + if (cache_size == 0) { + mp_using_cache_safe_exp = 0; + } + if ((cache_size == 0) || (cache_size >= 64)) { + max_window_bits = 6; + } else if (cache_size >= 32) { + max_window_bits = 5; + } else if (cache_size >= 16) { + max_window_bits = 4; + } else + max_window_bits = 1; /* should this be an assert? */ + } + + /* clamp the window size down before we caclulate bits_in_exponent */ + if (mp_using_cache_safe_exp) { + if (window_bits > max_window_bits) { + window_bits = max_window_bits; + } + } +#endif + + odd_ints = 1 << (window_bits - 1); + i = bits_in_exponent % window_bits; + if (i != 0) { + bits_in_exponent += window_bits - i; + } + +#ifdef MP_USING_MONT_MULF + if (mp_using_mont_mulf) { + MP_CHECKOK(s_mp_pad(&montBase, nLen)); + res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, odd_ints); + } else +#endif +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + if (mp_using_cache_safe_exp) { + res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, 1 << window_bits); + } else +#endif + res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, odd_ints); + +CLEANUP: + mp_clear(&montBase); + mp_clear(&goodBase); + /* Don't mp_clear mmm.N because it is merely a copy of modulus. + ** Just zap it. + */ + memset(&mmm, 0, sizeof mmm); + return res; +} diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c new file mode 100644 index 0000000000..b757150e79 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpprime.c @@ -0,0 +1,610 @@ +/* + * mpprime.c + * + * Utilities for finding and working with prime and pseudo-prime + * integers + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mpprime.h" +#include "mplogic.h" +#include +#include + +#define SMALL_TABLE 0 /* determines size of hard-wired prime table */ + +#define RANDOM() rand() + +#include "primes.c" /* pull in the prime digit table */ + +/* + Test if any of a given vector of digits divides a. If not, MP_NO + is returned; otherwise, MP_YES is returned and 'which' is set to + the index of the integer in the vector which divided a. + */ +mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which); + +/* {{{ mpp_divis(a, b) */ + +/* + mpp_divis(a, b) + + Returns MP_YES if a is divisible by b, or MP_NO if it is not. + */ + +mp_err +mpp_divis(mp_int *a, mp_int *b) +{ + mp_err res; + mp_int rem; + + if ((res = mp_init(&rem)) != MP_OKAY) + return res; + + if ((res = mp_mod(a, b, &rem)) != MP_OKAY) + goto CLEANUP; + + if (mp_cmp_z(&rem) == 0) + res = MP_YES; + else + res = MP_NO; + +CLEANUP: + mp_clear(&rem); + return res; + +} /* end mpp_divis() */ + +/* }}} */ + +/* {{{ mpp_divis_d(a, d) */ + +/* + mpp_divis_d(a, d) + + Return MP_YES if a is divisible by d, or MP_NO if it is not. + */ + +mp_err +mpp_divis_d(mp_int *a, mp_digit d) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL, MP_BADARG); + + if (d == 0) + return MP_NO; + + if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY) + return res; + + if (rem == 0) + return MP_YES; + else + return MP_NO; + +} /* end mpp_divis_d() */ + +/* }}} */ + +/* {{{ mpp_random(a) */ + +/* + mpp_random(a) + + Assigns a random value to a. This value is generated using the + standard C library's rand() function, so it should not be used for + cryptographic purposes, but it should be fine for primality testing, + since all we really care about there is good statistical properties. + + As many digits as a currently has are filled with random digits. + */ + +mp_err +mpp_random(mp_int *a) + +{ + mp_digit next = 0; + unsigned int ix, jx; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + for (jx = 0; jx < sizeof(mp_digit); jx++) { + next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX); + } + DIGIT(a, ix) = next; + } + + return MP_OKAY; + +} /* end mpp_random() */ + +/* }}} */ + +static mpp_random_fn mpp_random_insecure = &mpp_random; + +/* {{{ mpp_random_size(a, prec) */ + +mp_err +mpp_random_size(mp_int *a, mp_size prec) +{ + mp_err res; + + ARGCHK(a != NULL && prec > 0, MP_BADARG); + + if ((res = s_mp_pad(a, prec)) != MP_OKAY) + return res; + + return (*mpp_random_insecure)(a); + +} /* end mpp_random_size() */ + +/* }}} */ + +/* {{{ mpp_divis_vector(a, vec, size, which) */ + +/* + mpp_divis_vector(a, vec, size, which) + + Determines if a is divisible by any of the 'size' digits in vec. + Returns MP_YES and sets 'which' to the index of the offending digit, + if it is; returns MP_NO if it is not. + */ + +mp_err +mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which) +{ + ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG); + + return s_mpp_divp(a, vec, size, which); + +} /* end mpp_divis_vector() */ + +/* }}} */ + +/* {{{ mpp_divis_primes(a, np) */ + +/* + mpp_divis_primes(a, np) + + Test whether a is divisible by any of the first 'np' primes. If it + is, returns MP_YES and sets *np to the value of the digit that did + it. If not, returns MP_NO. + */ +mp_err +mpp_divis_primes(mp_int *a, mp_digit *np) +{ + int size, which; + mp_err res; + + ARGCHK(a != NULL && np != NULL, MP_BADARG); + + size = (int)*np; + if (size > prime_tab_size) + size = prime_tab_size; + + res = mpp_divis_vector(a, prime_tab, size, &which); + if (res == MP_YES) + *np = prime_tab[which]; + + return res; + +} /* end mpp_divis_primes() */ + +/* }}} */ + +/* {{{ mpp_fermat(a, w) */ + +/* + Using w as a witness, try pseudo-primality testing based on Fermat's + little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod + a). So, we compute z = w^a (mod a) and compare z to w; if they are + equal, the test passes and we return MP_YES. Otherwise, we return + MP_NO. + */ +mp_err +mpp_fermat(mp_int *a, mp_digit w) +{ + mp_int base, test; + mp_err res; + + if ((res = mp_init(&base)) != MP_OKAY) + return res; + + mp_set(&base, w); + + if ((res = mp_init(&test)) != MP_OKAY) + goto TEST; + + /* Compute test = base^a (mod a) */ + if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY) + goto CLEANUP; + + if (mp_cmp(&base, &test) == 0) + res = MP_YES; + else + res = MP_NO; + +CLEANUP: + mp_clear(&test); +TEST: + mp_clear(&base); + + return res; + +} /* end mpp_fermat() */ + +/* }}} */ + +/* + Perform the fermat test on each of the primes in a list until + a) one of them shows a is not prime, or + b) the list is exhausted. + Returns: MP_YES if it passes tests. + MP_NO if fermat test reveals it is composite + Some MP error code if some other error occurs. + */ +mp_err +mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes) +{ + mp_err rv = MP_YES; + + while (nPrimes-- > 0 && rv == MP_YES) { + rv = mpp_fermat(a, *primes++); + } + return rv; +} + +/* {{{ mpp_pprime(a, nt) */ + +/* + mpp_pprime(a, nt) + + Performs nt iteration of the Miller-Rabin probabilistic primality + test on a. Returns MP_YES if the tests pass, MP_NO if one fails. + If MP_NO is returned, the number is definitely composite. If MP_YES + is returned, it is probably prime (but that is not guaranteed). + */ + +mp_err +mpp_pprime(mp_int *a, int nt) +{ + return mpp_pprime_ext_random(a, nt, mpp_random_insecure); +} + +mp_err +mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random) +{ + mp_err res; + mp_int x, amo, m, z; /* "amo" = "a minus one" */ + int iter; + unsigned int jx; + mp_size b; + + ARGCHK(a != NULL, MP_BADARG); + + MP_DIGITS(&x) = 0; + MP_DIGITS(&amo) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&z) = 0; + + /* Initialize temporaries... */ + MP_CHECKOK(mp_init(&amo)); + /* Compute amo = a - 1 for what follows... */ + MP_CHECKOK(mp_sub_d(a, 1, &amo)); + + b = mp_trailing_zeros(&amo); + if (!b) { /* a was even ? */ + res = MP_NO; + goto CLEANUP; + } + + MP_CHECKOK(mp_init_size(&x, MP_USED(a))); + MP_CHECKOK(mp_init(&z)); + MP_CHECKOK(mp_init(&m)); + MP_CHECKOK(mp_div_2d(&amo, b, &m, 0)); + + /* Do the test nt times... */ + for (iter = 0; iter < nt; iter++) { + + /* Choose a random value for 1 < x < a */ + MP_CHECKOK(s_mp_pad(&x, USED(a))); + MP_CHECKOK((*random)(&x)); + MP_CHECKOK(mp_mod(&x, a, &x)); + if (mp_cmp_d(&x, 1) <= 0) { + iter--; /* don't count this iteration */ + continue; /* choose a new x */ + } + + /* Compute z = (x ** m) mod a */ + MP_CHECKOK(mp_exptmod(&x, &m, a, &z)); + + if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) { + res = MP_YES; + continue; + } + + res = MP_NO; /* just in case the following for loop never executes. */ + for (jx = 1; jx < b; jx++) { + /* z = z^2 (mod a) */ + MP_CHECKOK(mp_sqrmod(&z, a, &z)); + res = MP_NO; /* previous line set res to MP_YES */ + + if (mp_cmp_d(&z, 1) == 0) { + break; + } + if (mp_cmp(&z, &amo) == 0) { + res = MP_YES; + break; + } + } /* end testing loop */ + + /* If the test passes, we will continue iterating, but a failed + test means the candidate is definitely NOT prime, so we will + immediately break out of this loop + */ + if (res == MP_NO) + break; + + } /* end iterations loop */ + +CLEANUP: + mp_clear(&m); + mp_clear(&z); + mp_clear(&x); + mp_clear(&amo); + return res; + +} /* end mpp_pprime() */ + +/* }}} */ + +/* Produce table of composites from list of primes and trial value. +** trial must be odd. List of primes must not include 2. +** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest +** prime in list of primes. After this function is finished, +** if sieve[i] is non-zero, then (trial + 2*i) is composite. +** Each prime used in the sieve costs one division of trial, and eliminates +** one or more values from the search space. (3 eliminates 1/3 of the values +** alone!) Each value left in the search space costs 1 or more modular +** exponentations. So, these divisions are a bargain! +*/ +mp_err +mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes, + unsigned char *sieve, mp_size nSieve) +{ + mp_err res; + mp_digit rem; + mp_size ix; + unsigned long offset; + + memset(sieve, 0, nSieve); + + for (ix = 0; ix < nPrimes; ix++) { + mp_digit prime = primes[ix]; + mp_size i; + if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY) + return res; + + if (rem == 0) { + offset = 0; + } else { + offset = prime - rem; + } + + for (i = offset; i < nSieve * 2; i += prime) { + if (i % 2 == 0) { + sieve[i / 2] = 1; + } + } + } + + return MP_OKAY; +} + +#define SIEVE_SIZE 32 * 1024 + +mp_err +mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong) +{ + return mpp_make_prime_ext_random(start, nBits, strong, mpp_random_insecure); +} + +mp_err +mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random) +{ + mp_digit np; + mp_err res; + unsigned int i = 0; + mp_int trial; + mp_int q; + mp_size num_tests; + unsigned char *sieve; + + ARGCHK(start != 0, MP_BADARG); + ARGCHK(nBits > 16, MP_RANGE); + + sieve = malloc(SIEVE_SIZE); + ARGCHK(sieve != NULL, MP_MEM); + + MP_DIGITS(&trial) = 0; + MP_DIGITS(&q) = 0; + MP_CHECKOK(mp_init(&trial)); + MP_CHECKOK(mp_init(&q)); + /* values originally taken from table 4.4, + * HandBook of Applied Cryptography, augmented by FIPS-186 + * requirements, Table C.2 and C.3 */ + if (nBits >= 2000) { + num_tests = 3; + } else if (nBits >= 1536) { + num_tests = 4; + } else if (nBits >= 1024) { + num_tests = 5; + } else if (nBits >= 550) { + num_tests = 6; + } else if (nBits >= 450) { + num_tests = 7; + } else if (nBits >= 400) { + num_tests = 8; + } else if (nBits >= 350) { + num_tests = 9; + } else if (nBits >= 300) { + num_tests = 10; + } else if (nBits >= 250) { + num_tests = 20; + } else if (nBits >= 200) { + num_tests = 41; + } else if (nBits >= 100) { + num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the + * required more iterations for larger aux primes */ + } else + num_tests = 50; + + if (strong) + --nBits; + MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1)); + MP_CHECKOK(mpl_set_bit(start, 0, 1)); + for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) { + MP_CHECKOK(mpl_set_bit(start, i, 0)); + } + /* start sieveing with prime value of 3. */ + MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1, + sieve, SIEVE_SIZE)); + +#ifdef DEBUG_SIEVE + res = 0; + for (i = 0; i < SIEVE_SIZE; ++i) { + if (!sieve[i]) + ++res; + } + fprintf(stderr, "sieve found %d potential primes.\n", res); +#define FPUTC(x, y) fputc(x, y) +#else +#define FPUTC(x, y) +#endif + + res = MP_NO; + for (i = 0; i < SIEVE_SIZE; ++i) { + if (sieve[i]) /* this number is composite */ + continue; + MP_CHECKOK(mp_add_d(start, 2 * i, &trial)); + FPUTC('.', stderr); + /* run a Fermat test */ + res = mpp_fermat(&trial, 2); + if (res != MP_OKAY) { + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + FPUTC('+', stderr); + /* If that passed, run some Miller-Rabin tests */ + res = mpp_pprime_ext_random(&trial, num_tests, random); + if (res != MP_OKAY) { + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + FPUTC('!', stderr); + + if (!strong) + break; /* success !! */ + + /* At this point, we have strong evidence that our candidate + is itself prime. If we want a strong prime, we need now + to test q = 2p + 1 for primality... + */ + MP_CHECKOK(mp_mul_2(&trial, &q)); + MP_CHECKOK(mp_add_d(&q, 1, &q)); + + /* Test q for small prime divisors ... */ + np = prime_tab_size; + res = mpp_divis_primes(&q, &np); + if (res == MP_YES) { /* is composite */ + mp_clear(&q); + continue; + } + if (res != MP_NO) + goto CLEANUP; + + /* And test with Fermat, as with its parent ... */ + res = mpp_fermat(&q, 2); + if (res != MP_YES) { + mp_clear(&q); + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + /* And test with Miller-Rabin, as with its parent ... */ + res = mpp_pprime_ext_random(&q, num_tests, random); + if (res != MP_YES) { + mp_clear(&q); + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + /* If it passed, we've got a winner */ + mp_exch(&q, &trial); + mp_clear(&q); + break; + + } /* end of loop through sieved values */ + if (res == MP_YES) + mp_exch(&trial, start); +CLEANUP: + mp_clear(&trial); + mp_clear(&q); + if (sieve != NULL) { + memset(sieve, 0, SIEVE_SIZE); + free(sieve); + } + return res; +} + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static functions visible only to the library internally */ + +/* {{{ s_mpp_divp(a, vec, size, which) */ + +/* + Test for divisibility by members of a vector of digits. Returns + MP_NO if a is not divisible by any of them; returns MP_YES and sets + 'which' to the index of the offender, if it is. Will stop on the + first digit against which a is divisible. + */ + +mp_err +s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which) +{ + mp_err res; + mp_digit rem; + + int ix; + + for (ix = 0; ix < size; ix++) { + if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY) + return res; + + if (rem == 0) { + if (which) + *which = ix; + return MP_YES; + } + } + + return MP_NO; + +} /* end s_mpp_divp() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h new file mode 100644 index 0000000000..0bdc6598ce --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpprime.h @@ -0,0 +1,48 @@ +/* + * mpprime.h + * + * Utilities for finding and working with prime and pseudo-prime + * integers + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MP_PRIME_ +#define _H_MP_PRIME_ + +#include "mpi.h" + +SEC_BEGIN_PROTOS + +extern const int prime_tab_size; /* number of primes available */ +extern const mp_digit prime_tab[]; + +/* Tests for divisibility */ +mp_err mpp_divis(mp_int *a, mp_int *b); +mp_err mpp_divis_d(mp_int *a, mp_digit d); + +/* Random selection */ +mp_err mpp_random(mp_int *a); +mp_err mpp_random_size(mp_int *a, mp_size prec); + +/* Type for a pointer to a user-provided mpp_random implementation */ +typedef mp_err (*mpp_random_fn)(mp_int *); + +/* Pseudo-primality testing */ +mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which); +mp_err mpp_divis_primes(mp_int *a, mp_digit *np); +mp_err mpp_fermat(mp_int *a, mp_digit w); +mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes); +mp_err mpp_pprime(mp_int *a, int nt); +mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes, + unsigned char *sieve, mp_size nSieve); +mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong); + +/* Pseudo-primality tests using a user-provided mpp_random implementation */ +mp_err mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random); +mp_err mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random); + +SEC_END_PROTOS + +#endif /* end _H_MP_PRIME_ */ diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c new file mode 100644 index 0000000000..423311b65b --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparc.c @@ -0,0 +1,221 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "vis_proto.h" + +/***************************************************************/ + +typedef int t_s32; +typedef unsigned int t_u32; +#if defined(__sparcv9) +typedef long t_s64; +typedef unsigned long t_u64; +#else +typedef long long t_s64; +typedef unsigned long long t_u64; +#endif +typedef double t_d64; + +/***************************************************************/ + +typedef union { + t_d64 d64; + struct { + t_s32 i0; + t_s32 i1; + } i32s; +} d64_2_i32; + +/***************************************************************/ + +#define BUFF_SIZE 256 + +#define A_BITS 19 +#define A_MASK ((1 << A_BITS) - 1) + +/***************************************************************/ + +static t_u64 mask_cnst[] = { + 0x8000000080000000ull +}; + +/***************************************************************/ + +#define DEF_VARS(N) \ + t_d64 *py = (t_d64 *)y; \ + t_d64 mask = *((t_d64 *)mask_cnst); \ + t_d64 ca = (1u << 31) - 1; \ + t_d64 da = (t_d64)a; \ + t_s64 buff[N], s; \ + d64_2_i32 dy + +/***************************************************************/ + +#define MUL_U32_S64_2(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \ + buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da + +#define MUL_U32_S64_2_D(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + d0 = ca - (t_d64)dy.i32s.i0; \ + d1 = ca - (t_d64)dy.i32s.i1; \ + buff[4 * (i)] = (t_s64)(d0 * da); \ + buff[4 * (i) + 1] = (t_s64)(d0 * db); \ + buff[4 * (i) + 2] = (t_s64)(d1 * da); \ + buff[4 * (i) + 3] = (t_s64)(d1 * db) + +/***************************************************************/ + +#define ADD_S64_U32(i) \ + s = buff[i] + x[i] + c; \ + z[i] = s; \ + c = (s >> 32) + +#define ADD_S64_U32_D(i) \ + s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \ + z[i] = s; \ + uc = ((t_u64)s >> 32) + +/***************************************************************/ + +#define MUL_U32_S64_8(i) \ + MUL_U32_S64_2(i); \ + MUL_U32_S64_2(i + 1); \ + MUL_U32_S64_2(i + 2); \ + MUL_U32_S64_2(i + 3) + +#define MUL_U32_S64_D_8(i) \ + MUL_U32_S64_2_D(i); \ + MUL_U32_S64_2_D(i + 1); \ + MUL_U32_S64_2_D(i + 2); \ + MUL_U32_S64_2_D(i + 3) + +/***************************************************************/ + +#define ADD_S64_U32_8(i) \ + ADD_S64_U32(i); \ + ADD_S64_U32(i + 1); \ + ADD_S64_U32(i + 2); \ + ADD_S64_U32(i + 3); \ + ADD_S64_U32(i + 4); \ + ADD_S64_U32(i + 5); \ + ADD_S64_U32(i + 6); \ + ADD_S64_U32(i + 7) + +#define ADD_S64_U32_D_8(i) \ + ADD_S64_U32_D(i); \ + ADD_S64_U32_D(i + 1); \ + ADD_S64_U32_D(i + 2); \ + ADD_S64_U32_D(i + 3); \ + ADD_S64_U32_D(i + 4); \ + ADD_S64_U32_D(i + 5); \ + ADD_S64_U32_D(i + 6); \ + ADD_S64_U32_D(i + 7) + +/***************************************************************/ + +t_u32 +mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + if (a < (1 << A_BITS)) { + + if (n == 8) { + DEF_VARS(8); + t_s32 c = 0; + + MUL_U32_S64_8(0); + ADD_S64_U32_8(0); + + return c; + + } else if (n == 16) { + DEF_VARS(16); + t_s32 c = 0; + + MUL_U32_S64_8(0); + MUL_U32_S64_8(4); + ADD_S64_U32_8(0); + ADD_S64_U32_8(8); + + return c; + + } else { + DEF_VARS(BUFF_SIZE); + t_s32 i, c = 0; + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32(i); + } + + return c; + } + } else { + + if (n == 8) { + DEF_VARS(2 * 8); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + ADD_S64_U32_D_8(0); + + return uc; + + } else if (n == 16) { + DEF_VARS(2 * 16); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + MUL_U32_S64_D_8(4); + ADD_S64_U32_D_8(0); + ADD_S64_U32_D_8(8); + + return uc; + + } else { + DEF_VARS(2 * BUFF_SIZE); + t_d64 d0, d1, db; + t_u32 i, uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2_D(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32_D(i); + } + + return uc; + } + } +} + +/***************************************************************/ + +t_u32 +mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + return mul_add(x, x, y, n, a); +} + +/***************************************************************/ diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s new file mode 100644 index 0000000000..66122a1d9d --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s @@ -0,0 +1,1607 @@ +! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs. +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr +/* 000000 3 ( 0 0) */ .file "mpv_sparc.c" +/* 000000 14 ( 0 0) */ .align 8 +! +! SUBROUTINE .L_const_seg_900000106 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .L_const_seg_900000106: /* frequency 1.0 confidence 0.0 */ +/* 000000 19 ( 0 0) */ .word 1127219200,0 +/* 0x0008 20 ( 0 0) */ .word 1105199103,-4194304 +/* 0x0010 21 ( 0 0) */ .align 16 +/* 0x0010 27 ( 0 0) */ .global mul_add + +! +! ENTRY mul_add +! + + .global mul_add + mul_add: /* frequency 1.0 confidence 0.0 */ +/* 0x0010 29 ( 0 1) */ sethi %hi(0x1800),%g1 +/* 0x0014 30 ( 0 1) */ sethi %hi(mask_cnst),%g2 +/* 0x0018 31 ( 1 2) */ xor %g1,-984,%g1 +/* 0x001c 32 ( 1 2) */ add %g2,%lo(mask_cnst),%g2 +/* 0x0020 33 ( 2 4) */ save %sp,%g1,%sp + +! +! ENTRY .L900000154 +! + + .L900000154: /* frequency 1.0 confidence 0.0 */ +/* 0x0024 35 ( 0 2) */ call (.+0x8) ! params = ! Result = +/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5 +/* 0x002c 177 ( 2 3) */ sethi %hi(.L_const_seg_900000106),%g3 +/* 0x0030 178 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5 +/* 0x0034 179 ( 3 4) */ or %g0,%i4,%o1 +/* 0x0038 180 ( 3 4) */ st %o1,[%fp+84] +/* 0x003c 181 ( 3 4) */ add %g5,%o7,%o3 +/* 0x0040 182 ( 4 5) */ add %g3,%lo(.L_const_seg_900000106),%g3 +/* 0x0044 183 ( 4 6) */ ld [%o3+%g2],%g2 +/* 0x0048 184 ( 4 5) */ or %g0,%i3,%o2 +/* 0x004c 185 ( 5 6) */ sethi %hi(0x80000),%g4 +/* 0x0050 186 ( 5 7) */ ld [%o3+%g3],%o0 +/* 0x0054 187 ( 5 6) */ or %g0,%i2,%g5 +/* 0x0058 188 ( 6 7) */ or %g0,%o2,%o3 +/* 0x005c 189 ( 6 10) */ ldd [%g2],%f0 +/* 0x0060 190 ( 6 7) */ subcc %o1,%g4,%g0 +/* 0x0064 191 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50 +/* 0x0068 ( 7 8) */ subcc %o2,8,%g0 +/* 0x006c 193 ( 7 8) */ bne,pn %icc,.L77000037 ! tprob=0.50 +/* 0x0070 ( 8 12) */ ldd [%o0],%f8 +/* 0x0074 195 ( 9 13) */ ldd [%g5],%f4 +/* 0x0078 196 (10 14) */ ldd [%g5+8],%f6 +/* 0x007c 197 (11 15) */ ldd [%g5+16],%f10 +/* 0x0080 198 (11 14) */ fmovs %f8,%f12 +/* 0x0084 199 (12 16) */ fxnor %f0,%f4,%f4 +/* 0x0088 200 (12 14) */ ld [%fp+84],%f13 +/* 0x008c 201 (13 17) */ ldd [%o0+8],%f14 +/* 0x0090 202 (13 17) */ fxnor %f0,%f6,%f6 +/* 0x0094 203 (14 18) */ ldd [%g5+24],%f16 +/* 0x0098 204 (14 18) */ fxnor %f0,%f10,%f10 +/* 0x009c 208 (15 17) */ ld [%i1],%g2 +/* 0x00a0 209 (15 20) */ fsubd %f12,%f8,%f8 +/* 0x00a4 210 (16 21) */ fitod %f4,%f18 +/* 0x00a8 211 (16 18) */ ld [%i1+4],%g3 +/* 0x00ac 212 (17 22) */ fitod %f5,%f4 +/* 0x00b0 213 (17 19) */ ld [%i1+8],%g4 +/* 0x00b4 214 (18 23) */ fitod %f6,%f20 +/* 0x00b8 215 (18 20) */ ld [%i1+12],%g5 +/* 0x00bc 216 (19 21) */ ld [%i1+16],%o0 +/* 0x00c0 217 (19 24) */ fitod %f7,%f6 +/* 0x00c4 218 (20 22) */ ld [%i1+20],%o1 +/* 0x00c8 219 (20 24) */ fxnor %f0,%f16,%f16 +/* 0x00cc 220 (21 26) */ fsubd %f14,%f18,%f12 +/* 0x00d0 221 (21 23) */ ld [%i1+24],%o2 +/* 0x00d4 222 (22 27) */ fsubd %f14,%f4,%f4 +/* 0x00d8 223 (22 24) */ ld [%i1+28],%o3 +/* 0x00dc 224 (23 28) */ fitod %f10,%f18 +/* 0x00e0 225 (24 29) */ fsubd %f14,%f20,%f20 +/* 0x00e4 226 (25 30) */ fitod %f11,%f10 +/* 0x00e8 227 (26 31) */ fsubd %f14,%f6,%f6 +/* 0x00ec 228 (26 31) */ fmuld %f12,%f8,%f12 +/* 0x00f0 229 (27 32) */ fitod %f16,%f22 +/* 0x00f4 230 (27 32) */ fmuld %f4,%f8,%f4 +/* 0x00f8 231 (28 33) */ fsubd %f14,%f18,%f18 +/* 0x00fc 232 (29 34) */ fitod %f17,%f16 +/* 0x0100 233 (29 34) */ fmuld %f20,%f8,%f20 +/* 0x0104 234 (30 35) */ fsubd %f14,%f10,%f10 +/* 0x0108 235 (31 36) */ fdtox %f12,%f12 +/* 0x010c 236 (31 32) */ std %f12,[%sp+152] +/* 0x0110 237 (31 36) */ fmuld %f6,%f8,%f6 +/* 0x0114 238 (32 37) */ fdtox %f4,%f4 +/* 0x0118 239 (32 33) */ std %f4,[%sp+144] +/* 0x011c 240 (33 38) */ fsubd %f14,%f22,%f4 +/* 0x0120 241 (33 38) */ fmuld %f18,%f8,%f12 +/* 0x0124 242 (34 39) */ fdtox %f20,%f18 +/* 0x0128 243 (34 35) */ std %f18,[%sp+136] +/* 0x012c 244 (35 37) */ ldx [%sp+152],%o4 +/* 0x0130 245 (35 40) */ fsubd %f14,%f16,%f14 +/* 0x0134 246 (35 40) */ fmuld %f10,%f8,%f10 +/* 0x0138 247 (36 41) */ fdtox %f6,%f6 +/* 0x013c 248 (36 37) */ std %f6,[%sp+128] +/* 0x0140 249 (37 39) */ ldx [%sp+144],%o5 +/* 0x0144 250 (37 38) */ add %o4,%g2,%o4 +/* 0x0148 251 (38 39) */ st %o4,[%i0] +/* 0x014c 252 (38 39) */ srax %o4,32,%g2 +/* 0x0150 253 (38 43) */ fdtox %f12,%f6 +/* 0x0154 254 (38 43) */ fmuld %f4,%f8,%f4 +/* 0x0158 255 (39 40) */ std %f6,[%sp+120] +/* 0x015c 256 (39 40) */ add %o5,%g3,%g3 +/* 0x0160 257 (40 42) */ ldx [%sp+136],%o7 +/* 0x0164 258 (40 41) */ add %g3,%g2,%g2 +/* 0x0168 259 (40 45) */ fmuld %f14,%f8,%f6 +/* 0x016c 260 (40 45) */ fdtox %f10,%f8 +/* 0x0170 261 (41 42) */ std %f8,[%sp+112] +/* 0x0174 262 (41 42) */ srax %g2,32,%o5 +/* 0x0178 263 (42 44) */ ldx [%sp+128],%g3 +/* 0x017c 264 (42 43) */ add %o7,%g4,%g4 +/* 0x0180 265 (43 44) */ st %g2,[%i0+4] +/* 0x0184 266 (43 44) */ add %g4,%o5,%g4 +/* 0x0188 267 (43 48) */ fdtox %f4,%f4 +/* 0x018c 268 (44 46) */ ldx [%sp+120],%o5 +/* 0x0190 269 (44 45) */ add %g3,%g5,%g3 +/* 0x0194 270 (44 45) */ srax %g4,32,%g5 +/* 0x0198 271 (45 46) */ std %f4,[%sp+104] +/* 0x019c 272 (45 46) */ add %g3,%g5,%g3 +/* 0x01a0 273 (45 50) */ fdtox %f6,%f4 +/* 0x01a4 274 (46 47) */ std %f4,[%sp+96] +/* 0x01a8 275 (46 47) */ add %o5,%o0,%o0 +/* 0x01ac 276 (46 47) */ srax %g3,32,%o5 +/* 0x01b0 277 (47 49) */ ldx [%sp+112],%g5 +/* 0x01b4 278 (47 48) */ add %o0,%o5,%o0 +/* 0x01b8 279 (48 49) */ st %g4,[%i0+8] +/* 0x01bc 280 (49 51) */ ldx [%sp+104],%o5 +/* 0x01c0 281 (49 50) */ add %g5,%o1,%o1 +/* 0x01c4 282 (49 50) */ srax %o0,32,%g5 +/* 0x01c8 283 (50 51) */ st %o0,[%i0+16] +/* 0x01cc 284 (50 51) */ add %o1,%g5,%o1 +/* 0x01d0 285 (51 53) */ ldx [%sp+96],%g5 +/* 0x01d4 286 (51 52) */ add %o5,%o2,%o2 +/* 0x01d8 287 (51 52) */ srax %o1,32,%o5 +/* 0x01dc 288 (52 53) */ st %o1,[%i0+20] +/* 0x01e0 289 (52 53) */ add %o2,%o5,%o2 +/* 0x01e4 290 (53 54) */ st %o2,[%i0+24] +/* 0x01e8 291 (53 54) */ srax %o2,32,%g4 +/* 0x01ec 292 (53 54) */ add %g5,%o3,%g2 +/* 0x01f0 293 (54 55) */ st %g3,[%i0+12] +/* 0x01f4 294 (54 55) */ add %g2,%g4,%g2 +/* 0x01f8 295 (55 56) */ st %g2,[%i0+28] +/* 0x01fc 299 (55 56) */ srax %g2,32,%o7 +/* 0x0200 300 (56 57) */ or %g0,%o7,%i0 +/* 0x0204 (57 64) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0208 (59 61) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000037 +! + + .L77000037: /* frequency 1.0 confidence 0.0 */ +/* 0x020c 307 ( 0 1) */ subcc %o2,16,%g0 +/* 0x0210 308 ( 0 1) */ bne,pn %icc,.L77000076 ! tprob=0.50 +/* 0x0214 ( 1 5) */ ldd [%o0],%f8 +/* 0x0218 310 ( 2 6) */ ldd [%g5],%f4 +/* 0x021c 311 ( 3 7) */ ldd [%g5+8],%f6 +/* 0x0220 317 ( 4 8) */ ldd [%o0+8],%f14 +/* 0x0224 318 ( 4 7) */ fmovs %f8,%f12 +/* 0x0228 319 ( 5 7) */ ld [%fp+84],%f13 +/* 0x022c 320 ( 5 9) */ fxnor %f0,%f4,%f4 +/* 0x0230 321 ( 6 10) */ ldd [%g5+16],%f10 +/* 0x0234 322 ( 6 10) */ fxnor %f0,%f6,%f6 +/* 0x0238 323 ( 7 11) */ ldd [%g5+24],%f16 +/* 0x023c 324 ( 8 12) */ ldd [%g5+32],%f20 +/* 0x0240 325 ( 8 13) */ fsubd %f12,%f8,%f8 +/* 0x0244 331 ( 9 11) */ ld [%i1+40],%o7 +/* 0x0248 332 ( 9 14) */ fitod %f4,%f18 +/* 0x024c 333 (10 14) */ ldd [%g5+40],%f22 +/* 0x0250 334 (10 15) */ fitod %f5,%f4 +/* 0x0254 335 (11 12) */ stx %o7,[%sp+96] +/* 0x0258 336 (11 16) */ fitod %f6,%f24 +/* 0x025c 337 (12 14) */ ld [%i1+44],%o7 +/* 0x0260 338 (12 16) */ fxnor %f0,%f10,%f10 +/* 0x0264 339 (13 17) */ ldd [%g5+48],%f26 +/* 0x0268 340 (13 18) */ fitod %f7,%f6 +/* 0x026c 341 (14 15) */ stx %o7,[%sp+104] +/* 0x0270 342 (14 19) */ fsubd %f14,%f18,%f18 +/* 0x0274 343 (15 17) */ ld [%i1+48],%o7 +/* 0x0278 344 (15 20) */ fsubd %f14,%f4,%f4 +/* 0x027c 345 (16 18) */ ld [%i1+36],%o5 +/* 0x0280 346 (16 21) */ fitod %f10,%f28 +/* 0x0284 347 (17 18) */ stx %o7,[%sp+112] +/* 0x0288 348 (17 21) */ fxnor %f0,%f16,%f16 +/* 0x028c 349 (18 20) */ ld [%i1],%g2 +/* 0x0290 350 (18 23) */ fsubd %f14,%f24,%f24 +/* 0x0294 351 (19 20) */ stx %o5,[%sp+120] +/* 0x0298 352 (19 24) */ fitod %f11,%f10 +/* 0x029c 353 (19 24) */ fmuld %f18,%f8,%f18 +/* 0x02a0 354 (20 22) */ ld [%i1+52],%o5 +/* 0x02a4 355 (20 25) */ fsubd %f14,%f6,%f6 +/* 0x02a8 356 (20 25) */ fmuld %f4,%f8,%f4 +/* 0x02ac 357 (21 26) */ fitod %f16,%f30 +/* 0x02b0 358 (22 26) */ fxnor %f0,%f20,%f20 +/* 0x02b4 359 (22 24) */ ld [%i1+4],%g3 +/* 0x02b8 360 (23 27) */ ldd [%g5+56],%f2 +/* 0x02bc 361 (23 28) */ fsubd %f14,%f28,%f28 +/* 0x02c0 362 (23 28) */ fmuld %f24,%f8,%f24 +/* 0x02c4 363 (24 25) */ stx %o5,[%sp+128] +/* 0x02c8 364 (24 29) */ fdtox %f18,%f18 +/* 0x02cc 365 (25 26) */ std %f18,[%sp+272] +/* 0x02d0 366 (25 30) */ fitod %f17,%f16 +/* 0x02d4 367 (25 30) */ fmuld %f6,%f8,%f6 +/* 0x02d8 368 (26 31) */ fsubd %f14,%f10,%f10 +/* 0x02dc 369 (27 32) */ fitod %f20,%f18 +/* 0x02e0 370 (28 33) */ fdtox %f4,%f4 +/* 0x02e4 371 (28 29) */ std %f4,[%sp+264] +/* 0x02e8 372 (28 33) */ fmuld %f28,%f8,%f28 +/* 0x02ec 373 (29 31) */ ld [%i1+8],%g4 +/* 0x02f0 374 (29 34) */ fsubd %f14,%f30,%f4 +/* 0x02f4 375 (30 34) */ fxnor %f0,%f22,%f22 +/* 0x02f8 376 (30 32) */ ld [%i1+12],%g5 +/* 0x02fc 377 (31 33) */ ld [%i1+16],%o0 +/* 0x0300 378 (31 36) */ fitod %f21,%f20 +/* 0x0304 379 (31 36) */ fmuld %f10,%f8,%f10 +/* 0x0308 380 (32 34) */ ld [%i1+20],%o1 +/* 0x030c 381 (32 37) */ fdtox %f24,%f24 +/* 0x0310 382 (33 34) */ std %f24,[%sp+256] +/* 0x0314 383 (33 38) */ fsubd %f14,%f16,%f16 +/* 0x0318 384 (34 36) */ ldx [%sp+272],%o7 +/* 0x031c 385 (34 39) */ fdtox %f6,%f6 +/* 0x0320 386 (34 39) */ fmuld %f4,%f8,%f4 +/* 0x0324 387 (35 36) */ std %f6,[%sp+248] +/* 0x0328 388 (35 40) */ fitod %f22,%f24 +/* 0x032c 389 (36 38) */ ld [%i1+32],%o4 +/* 0x0330 390 (36 41) */ fsubd %f14,%f18,%f6 +/* 0x0334 391 (36 37) */ add %o7,%g2,%g2 +/* 0x0338 392 (37 39) */ ldx [%sp+264],%o7 +/* 0x033c 393 (37 41) */ fxnor %f0,%f26,%f26 +/* 0x0340 394 (37 38) */ srax %g2,32,%o5 +/* 0x0344 395 (38 39) */ st %g2,[%i0] +/* 0x0348 396 (38 43) */ fitod %f23,%f18 +/* 0x034c 397 (38 43) */ fmuld %f16,%f8,%f16 +/* 0x0350 398 (39 41) */ ldx [%sp+248],%g2 +/* 0x0354 399 (39 44) */ fdtox %f28,%f22 +/* 0x0358 400 (39 40) */ add %o7,%g3,%g3 +/* 0x035c 401 (40 42) */ ldx [%sp+256],%o7 +/* 0x0360 402 (40 45) */ fsubd %f14,%f20,%f20 +/* 0x0364 403 (40 41) */ add %g3,%o5,%g3 +/* 0x0368 404 (41 42) */ std %f22,[%sp+240] +/* 0x036c 405 (41 46) */ fitod %f26,%f22 +/* 0x0370 406 (41 42) */ srax %g3,32,%o5 +/* 0x0374 407 (41 42) */ add %g2,%g5,%g2 +/* 0x0378 408 (42 43) */ st %g3,[%i0+4] +/* 0x037c 409 (42 47) */ fdtox %f10,%f10 +/* 0x0380 410 (42 43) */ add %o7,%g4,%g4 +/* 0x0384 411 (42 47) */ fmuld %f6,%f8,%f6 +/* 0x0388 412 (43 44) */ std %f10,[%sp+232] +/* 0x038c 413 (43 47) */ fxnor %f0,%f2,%f12 +/* 0x0390 414 (43 44) */ add %g4,%o5,%g4 +/* 0x0394 415 (44 45) */ st %g4,[%i0+8] +/* 0x0398 416 (44 45) */ srax %g4,32,%o5 +/* 0x039c 417 (44 49) */ fsubd %f14,%f24,%f10 +/* 0x03a0 418 (45 47) */ ldx [%sp+240],%o7 +/* 0x03a4 419 (45 50) */ fdtox %f4,%f4 +/* 0x03a8 420 (45 46) */ add %g2,%o5,%g2 +/* 0x03ac 421 (45 50) */ fmuld %f20,%f8,%f20 +/* 0x03b0 422 (46 47) */ std %f4,[%sp+224] +/* 0x03b4 423 (46 47) */ srax %g2,32,%g5 +/* 0x03b8 424 (46 51) */ fsubd %f14,%f18,%f4 +/* 0x03bc 425 (47 48) */ st %g2,[%i0+12] +/* 0x03c0 426 (47 52) */ fitod %f27,%f24 +/* 0x03c4 427 (47 48) */ add %o7,%o0,%g3 +/* 0x03c8 428 (48 50) */ ldx [%sp+232],%o5 +/* 0x03cc 429 (48 53) */ fdtox %f16,%f16 +/* 0x03d0 430 (48 49) */ add %g3,%g5,%g2 +/* 0x03d4 431 (49 50) */ std %f16,[%sp+216] +/* 0x03d8 432 (49 50) */ srax %g2,32,%g4 +/* 0x03dc 433 (49 54) */ fitod %f12,%f18 +/* 0x03e0 434 (49 54) */ fmuld %f10,%f8,%f10 +/* 0x03e4 435 (50 51) */ st %g2,[%i0+16] +/* 0x03e8 436 (50 55) */ fsubd %f14,%f22,%f16 +/* 0x03ec 437 (50 51) */ add %o5,%o1,%g2 +/* 0x03f0 438 (51 53) */ ld [%i1+24],%o2 +/* 0x03f4 439 (51 56) */ fitod %f13,%f12 +/* 0x03f8 440 (51 52) */ add %g2,%g4,%g2 +/* 0x03fc 441 (51 56) */ fmuld %f4,%f8,%f22 +/* 0x0400 442 (52 54) */ ldx [%sp+224],%g3 +/* 0x0404 443 (52 53) */ srax %g2,32,%g4 +/* 0x0408 444 (52 57) */ fdtox %f6,%f6 +/* 0x040c 445 (53 54) */ std %f6,[%sp+208] +/* 0x0410 446 (53 58) */ fdtox %f20,%f6 +/* 0x0414 447 (54 55) */ stx %o4,[%sp+136] +/* 0x0418 448 (54 59) */ fsubd %f14,%f24,%f4 +/* 0x041c 449 (55 56) */ std %f6,[%sp+200] +/* 0x0420 450 (55 60) */ fsubd %f14,%f18,%f6 +/* 0x0424 451 (55 60) */ fmuld %f16,%f8,%f16 +/* 0x0428 452 (56 57) */ st %g2,[%i0+20] +/* 0x042c 453 (56 57) */ add %g3,%o2,%g2 +/* 0x0430 454 (56 61) */ fdtox %f10,%f10 +/* 0x0434 455 (57 59) */ ld [%i1+28],%o3 +/* 0x0438 456 (57 58) */ add %g2,%g4,%g2 +/* 0x043c 457 (58 60) */ ldx [%sp+216],%g5 +/* 0x0440 458 (58 59) */ srax %g2,32,%g4 +/* 0x0444 459 (59 60) */ std %f10,[%sp+192] +/* 0x0448 460 (59 64) */ fsubd %f14,%f12,%f10 +/* 0x044c 461 (59 64) */ fmuld %f4,%f8,%f4 +/* 0x0450 462 (60 61) */ st %g2,[%i0+24] +/* 0x0454 463 (60 61) */ add %g5,%o3,%g2 +/* 0x0458 464 (60 65) */ fdtox %f22,%f12 +/* 0x045c 465 (60 65) */ fmuld %f6,%f8,%f6 +/* 0x0460 466 (61 63) */ ldx [%sp+136],%o0 +/* 0x0464 467 (61 62) */ add %g2,%g4,%g2 +/* 0x0468 468 (62 64) */ ldx [%sp+208],%g3 +/* 0x046c 469 (62 63) */ srax %g2,32,%g4 +/* 0x0470 470 (63 65) */ ldx [%sp+120],%o1 +/* 0x0474 471 (64 66) */ ldx [%sp+200],%g5 +/* 0x0478 472 (64 65) */ add %g3,%o0,%g3 +/* 0x047c 473 (64 69) */ fdtox %f4,%f4 +/* 0x0480 474 (64 69) */ fmuld %f10,%f8,%f8 +/* 0x0484 475 (65 66) */ std %f12,[%sp+184] +/* 0x0488 476 (65 66) */ add %g3,%g4,%g3 +/* 0x048c 477 (65 70) */ fdtox %f16,%f12 +/* 0x0490 478 (66 67) */ std %f12,[%sp+176] +/* 0x0494 479 (66 67) */ srax %g3,32,%o0 +/* 0x0498 480 (66 67) */ add %g5,%o1,%g5 +/* 0x049c 481 (67 69) */ ldx [%sp+192],%o2 +/* 0x04a0 482 (67 68) */ add %g5,%o0,%g5 +/* 0x04a4 483 (68 70) */ ldx [%sp+96],%g4 +/* 0x04a8 484 (68 69) */ srax %g5,32,%o1 +/* 0x04ac 485 (69 71) */ ld [%i1+56],%o4 +/* 0x04b0 486 (70 72) */ ldx [%sp+104],%o0 +/* 0x04b4 487 (70 71) */ add %o2,%g4,%g4 +/* 0x04b8 488 (71 72) */ std %f4,[%sp+168] +/* 0x04bc 489 (71 72) */ add %g4,%o1,%g4 +/* 0x04c0 490 (71 76) */ fdtox %f6,%f4 +/* 0x04c4 491 (72 74) */ ldx [%sp+184],%o3 +/* 0x04c8 492 (72 73) */ srax %g4,32,%o2 +/* 0x04cc 493 (73 75) */ ldx [%sp+112],%o1 +/* 0x04d0 494 (74 75) */ std %f4,[%sp+160] +/* 0x04d4 495 (74 75) */ add %o3,%o0,%o0 +/* 0x04d8 496 (74 79) */ fdtox %f8,%f4 +/* 0x04dc 497 (75 77) */ ldx [%sp+176],%o5 +/* 0x04e0 498 (75 76) */ add %o0,%o2,%o0 +/* 0x04e4 499 (76 77) */ stx %o4,[%sp+144] +/* 0x04e8 500 (77 78) */ st %g2,[%i0+28] +/* 0x04ec 501 (77 78) */ add %o5,%o1,%g2 +/* 0x04f0 502 (77 78) */ srax %o0,32,%o1 +/* 0x04f4 503 (78 79) */ std %f4,[%sp+152] +/* 0x04f8 504 (78 79) */ add %g2,%o1,%o1 +/* 0x04fc 505 (79 81) */ ldx [%sp+168],%o7 +/* 0x0500 506 (79 80) */ srax %o1,32,%o3 +/* 0x0504 507 (80 82) */ ldx [%sp+128],%o2 +/* 0x0508 508 (81 83) */ ld [%i1+60],%o4 +/* 0x050c 509 (82 83) */ add %o7,%o2,%o2 +/* 0x0510 510 (83 84) */ add %o2,%o3,%o2 +/* 0x0514 511 (83 85) */ ldx [%sp+144],%o5 +/* 0x0518 512 (84 86) */ ldx [%sp+160],%g2 +/* 0x051c 513 (85 87) */ ldx [%sp+152],%o3 +/* 0x0520 514 (86 87) */ st %g3,[%i0+32] +/* 0x0524 515 (86 87) */ add %g2,%o5,%g2 +/* 0x0528 516 (86 87) */ srax %o2,32,%o5 +/* 0x052c 517 (87 88) */ st %g5,[%i0+36] +/* 0x0530 518 (87 88) */ add %g2,%o5,%g2 +/* 0x0534 519 (87 88) */ add %o3,%o4,%g3 +/* 0x0538 520 (88 89) */ st %o0,[%i0+44] +/* 0x053c 521 (88 89) */ srax %g2,32,%g5 +/* 0x0540 522 (89 90) */ st %o1,[%i0+48] +/* 0x0544 523 (89 90) */ add %g3,%g5,%g3 +/* 0x0548 524 (90 91) */ st %o2,[%i0+52] +/* 0x054c 528 (90 91) */ srax %g3,32,%o7 +/* 0x0550 529 (91 92) */ st %g4,[%i0+40] +/* 0x0554 530 (92 93) */ st %g2,[%i0+56] +/* 0x0558 531 (93 94) */ st %g3,[%i0+60] +/* 0x055c 532 (93 94) */ or %g0,%o7,%i0 +/* 0x0560 (94 101) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0564 (96 98) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000076 +! + + .L77000076: /* frequency 1.0 confidence 0.0 */ +/* 0x0568 540 ( 0 4) */ ldd [%o0],%f6 +/* 0x056c 546 ( 0 1) */ add %o2,1,%g2 +/* 0x0570 547 ( 0 3) */ fmovd %f0,%f14 +/* 0x0574 548 ( 0 1) */ or %g0,0,%o7 +/* 0x0578 549 ( 1 3) */ ld [%fp+84],%f9 +/* 0x057c 550 ( 1 2) */ srl %g2,31,%g3 +/* 0x0580 551 ( 1 2) */ add %fp,-2264,%o5 +/* 0x0584 552 ( 2 3) */ add %g2,%g3,%g2 +/* 0x0588 553 ( 2 6) */ ldd [%o0+8],%f18 +/* 0x058c 554 ( 2 3) */ add %fp,-2256,%o4 +/* 0x0590 555 ( 3 6) */ fmovs %f6,%f8 +/* 0x0594 556 ( 3 4) */ sra %g2,1,%o1 +/* 0x0598 557 ( 3 4) */ or %g0,0,%g2 +/* 0x059c 558 ( 4 5) */ subcc %o1,0,%g0 +/* 0x05a0 559 ( 4 5) */ sub %o1,1,%o2 +/* 0x05a4 563 ( 5 6) */ add %g5,32,%o0 +/* 0x05a8 564 ( 6 11) */ fsubd %f8,%f6,%f16 +/* 0x05ac 565 ( 6 7) */ ble,pt %icc,.L900000161 ! tprob=0.50 +/* 0x05b0 ( 6 7) */ subcc %o3,0,%g0 +/* 0x05b4 567 ( 7 8) */ subcc %o1,7,%g0 +/* 0x05b8 568 ( 7 8) */ bl,pn %icc,.L77000077 ! tprob=0.50 +/* 0x05bc ( 7 8) */ sub %o1,2,%o1 +/* 0x05c0 570 ( 8 12) */ ldd [%g5],%f2 +/* 0x05c4 571 ( 9 13) */ ldd [%g5+8],%f4 +/* 0x05c8 572 ( 9 10) */ or %g0,5,%g2 +/* 0x05cc 573 (10 14) */ ldd [%g5+16],%f0 +/* 0x05d0 574 (11 15) */ fxnor %f14,%f2,%f2 +/* 0x05d4 575 (11 15) */ ldd [%g5+24],%f12 +/* 0x05d8 576 (12 16) */ fxnor %f14,%f4,%f6 +/* 0x05dc 577 (12 16) */ ldd [%g5+32],%f10 +/* 0x05e0 578 (13 17) */ fxnor %f14,%f0,%f8 +/* 0x05e4 579 (15 20) */ fitod %f3,%f0 +/* 0x05e8 580 (16 21) */ fitod %f2,%f4 +/* 0x05ec 581 (17 22) */ fitod %f7,%f2 +/* 0x05f0 582 (18 23) */ fitod %f6,%f6 +/* 0x05f4 583 (20 25) */ fsubd %f18,%f0,%f0 +/* 0x05f8 584 (21 26) */ fsubd %f18,%f4,%f4 + +! +! ENTRY .L900000149 +! + + .L900000149: /* frequency 1.0 confidence 0.0 */ +/* 0x05fc 586 ( 0 4) */ fxnor %f14,%f12,%f22 +/* 0x0600 587 ( 0 5) */ fmuld %f4,%f16,%f4 +/* 0x0604 588 ( 0 1) */ add %g2,2,%g2 +/* 0x0608 589 ( 0 1) */ add %o4,32,%o4 +/* 0x060c 590 ( 1 6) */ fitod %f9,%f24 +/* 0x0610 591 ( 1 6) */ fmuld %f0,%f16,%f20 +/* 0x0614 592 ( 1 2) */ add %o0,8,%o0 +/* 0x0618 593 ( 1 2) */ subcc %g2,%o1,%g0 +/* 0x061c 594 ( 2 6) */ ldd [%o0],%f12 +/* 0x0620 595 ( 2 7) */ fsubd %f18,%f2,%f0 +/* 0x0624 596 ( 2 3) */ add %o5,32,%o5 +/* 0x0628 597 ( 3 8) */ fsubd %f18,%f6,%f2 +/* 0x062c 598 ( 5 10) */ fdtox %f4,%f4 +/* 0x0630 599 ( 6 11) */ fdtox %f20,%f6 +/* 0x0634 600 ( 6 7) */ std %f4,[%o5-32] +/* 0x0638 601 ( 7 12) */ fitod %f8,%f4 +/* 0x063c 602 ( 7 8) */ std %f6,[%o4-32] +/* 0x0640 603 ( 8 12) */ fxnor %f14,%f10,%f8 +/* 0x0644 604 ( 8 13) */ fmuld %f2,%f16,%f6 +/* 0x0648 605 ( 9 14) */ fitod %f23,%f2 +/* 0x064c 606 ( 9 14) */ fmuld %f0,%f16,%f20 +/* 0x0650 607 ( 9 10) */ add %o0,8,%o0 +/* 0x0654 608 (10 14) */ ldd [%o0],%f10 +/* 0x0658 609 (10 15) */ fsubd %f18,%f24,%f0 +/* 0x065c 610 (12 17) */ fsubd %f18,%f4,%f4 +/* 0x0660 611 (13 18) */ fdtox %f6,%f6 +/* 0x0664 612 (14 19) */ fdtox %f20,%f20 +/* 0x0668 613 (14 15) */ std %f6,[%o5-16] +/* 0x066c 614 (15 20) */ fitod %f22,%f6 +/* 0x0670 615 (15 16) */ ble,pt %icc,.L900000149 ! tprob=0.50 +/* 0x0674 (15 16) */ std %f20,[%o4-16] + +! +! ENTRY .L900000152 +! + + .L900000152: /* frequency 1.0 confidence 0.0 */ +/* 0x0678 618 ( 0 4) */ fxnor %f14,%f12,%f12 +/* 0x067c 619 ( 0 5) */ fmuld %f0,%f16,%f22 +/* 0x0680 620 ( 0 1) */ add %o5,80,%o5 +/* 0x0684 621 ( 0 1) */ add %o4,80,%o4 +/* 0x0688 622 ( 1 5) */ fxnor %f14,%f10,%f0 +/* 0x068c 623 ( 1 6) */ fmuld %f4,%f16,%f24 +/* 0x0690 624 ( 1 2) */ subcc %g2,%o2,%g0 +/* 0x0694 625 ( 1 2) */ add %o0,8,%g5 +/* 0x0698 626 ( 2 7) */ fitod %f8,%f20 +/* 0x069c 627 ( 3 8) */ fitod %f9,%f8 +/* 0x06a0 628 ( 4 9) */ fsubd %f18,%f6,%f6 +/* 0x06a4 629 ( 5 10) */ fitod %f12,%f26 +/* 0x06a8 630 ( 6 11) */ fitod %f13,%f4 +/* 0x06ac 631 ( 7 12) */ fsubd %f18,%f2,%f12 +/* 0x06b0 632 ( 8 13) */ fitod %f0,%f2 +/* 0x06b4 633 ( 9 14) */ fitod %f1,%f0 +/* 0x06b8 634 (10 15) */ fsubd %f18,%f20,%f10 +/* 0x06bc 635 (10 15) */ fmuld %f6,%f16,%f20 +/* 0x06c0 636 (11 16) */ fsubd %f18,%f8,%f8 +/* 0x06c4 637 (12 17) */ fsubd %f18,%f26,%f6 +/* 0x06c8 638 (12 17) */ fmuld %f12,%f16,%f12 +/* 0x06cc 639 (13 18) */ fsubd %f18,%f4,%f4 +/* 0x06d0 640 (14 19) */ fsubd %f18,%f2,%f2 +/* 0x06d4 641 (15 20) */ fsubd %f18,%f0,%f0 +/* 0x06d8 642 (15 20) */ fmuld %f10,%f16,%f10 +/* 0x06dc 643 (16 21) */ fdtox %f24,%f24 +/* 0x06e0 644 (16 17) */ std %f24,[%o5-80] +/* 0x06e4 645 (16 21) */ fmuld %f8,%f16,%f8 +/* 0x06e8 646 (17 22) */ fdtox %f22,%f22 +/* 0x06ec 647 (17 18) */ std %f22,[%o4-80] +/* 0x06f0 648 (17 22) */ fmuld %f6,%f16,%f6 +/* 0x06f4 649 (18 23) */ fdtox %f20,%f20 +/* 0x06f8 650 (18 19) */ std %f20,[%o5-64] +/* 0x06fc 651 (18 23) */ fmuld %f4,%f16,%f4 +/* 0x0700 652 (19 24) */ fdtox %f12,%f12 +/* 0x0704 653 (19 20) */ std %f12,[%o4-64] +/* 0x0708 654 (19 24) */ fmuld %f2,%f16,%f2 +/* 0x070c 655 (20 25) */ fdtox %f10,%f10 +/* 0x0710 656 (20 21) */ std %f10,[%o5-48] +/* 0x0714 657 (20 25) */ fmuld %f0,%f16,%f0 +/* 0x0718 658 (21 26) */ fdtox %f8,%f8 +/* 0x071c 659 (21 22) */ std %f8,[%o4-48] +/* 0x0720 660 (22 27) */ fdtox %f6,%f6 +/* 0x0724 661 (22 23) */ std %f6,[%o5-32] +/* 0x0728 662 (23 28) */ fdtox %f4,%f4 +/* 0x072c 663 (23 24) */ std %f4,[%o4-32] +/* 0x0730 664 (24 29) */ fdtox %f2,%f2 +/* 0x0734 665 (24 25) */ std %f2,[%o5-16] +/* 0x0738 666 (25 30) */ fdtox %f0,%f0 +/* 0x073c 667 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50 +/* 0x0740 (25 26) */ std %f0,[%o4-16] + +! +! ENTRY .L77000077 +! + + .L77000077: /* frequency 1.0 confidence 0.0 */ +/* 0x0744 670 ( 0 4) */ ldd [%g5],%f0 + +! +! ENTRY .L900000160 +! + + .L900000160: /* frequency 1.0 confidence 0.0 */ +/* 0x0748 672 ( 0 4) */ fxnor %f14,%f0,%f0 +/* 0x074c 673 ( 0 1) */ add %g2,1,%g2 +/* 0x0750 674 ( 0 1) */ add %g5,8,%g5 +/* 0x0754 675 ( 1 2) */ subcc %g2,%o2,%g0 +/* 0x0758 676 ( 4 9) */ fitod %f0,%f2 +/* 0x075c 677 ( 5 10) */ fitod %f1,%f0 +/* 0x0760 678 ( 9 14) */ fsubd %f18,%f2,%f2 +/* 0x0764 679 (10 15) */ fsubd %f18,%f0,%f0 +/* 0x0768 680 (14 19) */ fmuld %f2,%f16,%f2 +/* 0x076c 681 (15 20) */ fmuld %f0,%f16,%f0 +/* 0x0770 682 (19 24) */ fdtox %f2,%f2 +/* 0x0774 683 (19 20) */ std %f2,[%o5] +/* 0x0778 684 (19 20) */ add %o5,16,%o5 +/* 0x077c 685 (20 25) */ fdtox %f0,%f0 +/* 0x0780 686 (20 21) */ std %f0,[%o4] +/* 0x0784 687 (20 21) */ add %o4,16,%o4 +/* 0x0788 688 (20 21) */ ble,a,pt %icc,.L900000160 ! tprob=0.50 +/* 0x078c (23 27) */ ldd [%g5],%f0 + +! +! ENTRY .L77000043 +! + + .L77000043: /* frequency 1.0 confidence 0.0 */ +/* 0x0790 696 ( 0 1) */ subcc %o3,0,%g0 + +! +! ENTRY .L900000161 +! + + .L900000161: /* frequency 1.0 confidence 0.0 */ +/* 0x0794 698 ( 0 1) */ ble,a,pt %icc,.L900000159 ! tprob=0.50 +/* 0x0798 ( 0 1) */ or %g0,%o7,%i0 +/* 0x079c 703 ( 0 2) */ ldx [%fp-2256],%o2 +/* 0x07a0 704 ( 0 1) */ or %g0,%i1,%g3 +/* 0x07a4 705 ( 1 2) */ sub %o3,1,%o5 +/* 0x07a8 706 ( 1 2) */ or %g0,0,%g4 +/* 0x07ac 707 ( 2 3) */ add %fp,-2264,%g5 +/* 0x07b0 708 ( 2 3) */ or %g0,%i0,%g2 +/* 0x07b4 709 ( 3 4) */ subcc %o3,6,%g0 +/* 0x07b8 710 ( 3 4) */ sub %o5,2,%o4 +/* 0x07bc 711 ( 3 4) */ bl,pn %icc,.L77000078 ! tprob=0.50 +/* 0x07c0 ( 3 5) */ ldx [%fp-2264],%o0 +/* 0x07c4 713 ( 4 6) */ ld [%g3],%o1 +/* 0x07c8 714 ( 4 5) */ add %g2,4,%g2 +/* 0x07cc 715 ( 4 5) */ or %g0,3,%g4 +/* 0x07d0 716 ( 5 7) */ ld [%g3+4],%o3 +/* 0x07d4 717 ( 5 6) */ add %g3,8,%g3 +/* 0x07d8 718 ( 5 6) */ add %fp,-2240,%g5 +/* 0x07dc 719 ( 6 7) */ add %o0,%o1,%o0 +/* 0x07e0 720 ( 6 8) */ ldx [%fp-2248],%o1 +/* 0x07e4 721 ( 7 8) */ st %o0,[%g2-4] +/* 0x07e8 722 ( 7 8) */ srax %o0,32,%o0 + +! +! ENTRY .L900000145 +! + + .L900000145: /* frequency 1.0 confidence 0.0 */ +/* 0x07ec 724 ( 0 2) */ ld [%g3],%o7 +/* 0x07f0 725 ( 0 1) */ add %o2,%o3,%o2 +/* 0x07f4 726 ( 0 1) */ sra %o0,0,%o3 +/* 0x07f8 727 ( 1 3) */ ldx [%g5],%o0 +/* 0x07fc 728 ( 1 2) */ add %o2,%o3,%o2 +/* 0x0800 729 ( 1 2) */ add %g4,3,%g4 +/* 0x0804 730 ( 2 3) */ st %o2,[%g2] +/* 0x0808 731 ( 2 3) */ srax %o2,32,%o3 +/* 0x080c 732 ( 2 3) */ subcc %g4,%o4,%g0 +/* 0x0810 733 ( 3 5) */ ld [%g3+4],%o2 +/* 0x0814 734 ( 4 5) */ stx %o2,[%sp+96] +/* 0x0818 735 ( 4 5) */ add %o1,%o7,%o1 +/* 0x081c 736 ( 5 7) */ ldx [%g5+8],%o2 +/* 0x0820 737 ( 5 6) */ add %o1,%o3,%o1 +/* 0x0824 738 ( 5 6) */ add %g2,12,%g2 +/* 0x0828 739 ( 6 7) */ st %o1,[%g2-8] +/* 0x082c 740 ( 6 7) */ srax %o1,32,%o7 +/* 0x0830 741 ( 6 7) */ add %g3,12,%g3 +/* 0x0834 742 ( 7 9) */ ld [%g3-4],%o3 +/* 0x0838 743 ( 8 10) */ ldx [%sp+96],%o1 +/* 0x083c 744 (10 11) */ add %o0,%o1,%o0 +/* 0x0840 745 (10 12) */ ldx [%g5+16],%o1 +/* 0x0844 746 (11 12) */ add %o0,%o7,%o0 +/* 0x0848 747 (11 12) */ add %g5,24,%g5 +/* 0x084c 748 (11 12) */ st %o0,[%g2-4] +/* 0x0850 749 (11 12) */ ble,pt %icc,.L900000145 ! tprob=0.50 +/* 0x0854 (12 13) */ srax %o0,32,%o0 + +! +! ENTRY .L900000148 +! + + .L900000148: /* frequency 1.0 confidence 0.0 */ +/* 0x0858 752 ( 0 1) */ add %o2,%o3,%o2 +/* 0x085c 753 ( 0 1) */ sra %o0,0,%o3 +/* 0x0860 754 ( 0 2) */ ld [%g3],%o0 +/* 0x0864 755 ( 1 2) */ add %o2,%o3,%o3 +/* 0x0868 756 ( 1 2) */ add %g2,8,%g2 +/* 0x086c 757 ( 2 3) */ srax %o3,32,%o2 +/* 0x0870 758 ( 2 3) */ st %o3,[%g2-8] +/* 0x0874 759 ( 2 3) */ add %o1,%o0,%o0 +/* 0x0878 760 ( 3 4) */ add %o0,%o2,%o0 +/* 0x087c 761 ( 3 4) */ st %o0,[%g2-4] +/* 0x0880 762 ( 3 4) */ subcc %g4,%o5,%g0 +/* 0x0884 763 ( 3 4) */ bg,pn %icc,.L77000061 ! tprob=0.50 +/* 0x0888 ( 4 5) */ srax %o0,32,%o7 +/* 0x088c 765 ( 4 5) */ add %g3,4,%g3 + +! +! ENTRY .L77000078 +! + + .L77000078: /* frequency 1.0 confidence 0.0 */ +/* 0x0890 767 ( 0 2) */ ld [%g3],%o2 + +! +! ENTRY .L900000158 +! + + .L900000158: /* frequency 1.0 confidence 0.0 */ +/* 0x0894 769 ( 0 2) */ ldx [%g5],%o0 +/* 0x0898 770 ( 0 1) */ sra %o7,0,%o1 +/* 0x089c 771 ( 0 1) */ add %g4,1,%g4 +/* 0x08a0 772 ( 1 2) */ add %g3,4,%g3 +/* 0x08a4 773 ( 1 2) */ add %g5,8,%g5 +/* 0x08a8 774 ( 2 3) */ add %o0,%o2,%o0 +/* 0x08ac 775 ( 2 3) */ subcc %g4,%o5,%g0 +/* 0x08b0 776 ( 3 4) */ add %o0,%o1,%o0 +/* 0x08b4 777 ( 3 4) */ st %o0,[%g2] +/* 0x08b8 778 ( 3 4) */ add %g2,4,%g2 +/* 0x08bc 779 ( 4 5) */ srax %o0,32,%o7 +/* 0x08c0 780 ( 4 5) */ ble,a,pt %icc,.L900000158 ! tprob=0.50 +/* 0x08c4 ( 4 6) */ ld [%g3],%o2 + +! +! ENTRY .L77000047 +! + + .L77000047: /* frequency 1.0 confidence 0.0 */ +/* 0x08c8 783 ( 0 1) */ or %g0,%o7,%i0 +/* 0x08cc ( 1 8) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x08d0 ( 3 5) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000048 +! + + .L77000048: /* frequency 1.0 confidence 0.0 */ +/* 0x08d4 794 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50 +/* 0x08d8 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x08dc 796 ( 0 4) */ ldd [%g5],%f4 +/* 0x08e0 804 ( 0 1) */ srl %o1,19,%g3 +/* 0x08e4 805 ( 1 2) */ st %g3,[%sp+240] +/* 0x08e8 806 ( 1 2) */ andn %o1,%g2,%g2 +/* 0x08ec 807 ( 2 6) */ ldd [%o0],%f8 +/* 0x08f0 808 ( 3 4) */ st %g2,[%sp+244] +/* 0x08f4 809 ( 3 7) */ fxnor %f0,%f4,%f4 +/* 0x08f8 810 ( 4 8) */ ldd [%g5+8],%f6 +/* 0x08fc 814 ( 5 9) */ ldd [%o0+8],%f18 +/* 0x0900 815 ( 5 8) */ fmovs %f8,%f12 +/* 0x0904 816 ( 6 10) */ ldd [%g5+16],%f10 +/* 0x0908 817 ( 6 9) */ fmovs %f8,%f16 +/* 0x090c 818 ( 7 11) */ ldd [%g5+24],%f20 +/* 0x0910 819 ( 7 12) */ fitod %f4,%f14 +/* 0x0914 823 ( 8 10) */ ld [%i1],%g2 +/* 0x0918 824 ( 8 13) */ fitod %f5,%f4 +/* 0x091c 825 ( 9 11) */ ld [%sp+240],%f13 +/* 0x0920 826 ( 9 13) */ fxnor %f0,%f6,%f6 +/* 0x0924 827 (10 12) */ ld [%sp+244],%f17 +/* 0x0928 828 (10 14) */ fxnor %f0,%f10,%f10 +/* 0x092c 829 (11 13) */ ld [%i1+28],%o3 +/* 0x0930 830 (11 15) */ fxnor %f0,%f20,%f20 +/* 0x0934 831 (12 14) */ ld [%i1+4],%g3 +/* 0x0938 832 (12 17) */ fsubd %f12,%f8,%f12 +/* 0x093c 833 (13 14) */ stx %o3,[%sp+96] +/* 0x0940 834 (13 18) */ fsubd %f18,%f14,%f14 +/* 0x0944 835 (14 16) */ ld [%i1+8],%g4 +/* 0x0948 836 (14 19) */ fsubd %f16,%f8,%f8 +/* 0x094c 837 (15 17) */ ld [%i1+12],%g5 +/* 0x0950 838 (15 20) */ fsubd %f18,%f4,%f4 +/* 0x0954 839 (16 18) */ ld [%i1+16],%o0 +/* 0x0958 840 (16 21) */ fitod %f6,%f22 +/* 0x095c 841 (17 19) */ ld [%i1+20],%o1 +/* 0x0960 842 (17 22) */ fitod %f7,%f6 +/* 0x0964 843 (18 20) */ ld [%i1+24],%o2 +/* 0x0968 844 (18 23) */ fitod %f10,%f16 +/* 0x096c 845 (18 23) */ fmuld %f14,%f12,%f24 +/* 0x0970 846 (19 24) */ fitod %f20,%f28 +/* 0x0974 847 (19 24) */ fmuld %f14,%f8,%f14 +/* 0x0978 848 (20 25) */ fitod %f11,%f10 +/* 0x097c 849 (20 25) */ fmuld %f4,%f12,%f26 +/* 0x0980 850 (21 26) */ fsubd %f18,%f22,%f22 +/* 0x0984 851 (21 26) */ fmuld %f4,%f8,%f4 +/* 0x0988 852 (22 27) */ fsubd %f18,%f6,%f6 +/* 0x098c 853 (23 28) */ fdtox %f24,%f24 +/* 0x0990 854 (23 24) */ std %f24,[%sp+224] +/* 0x0994 855 (24 29) */ fdtox %f14,%f14 +/* 0x0998 856 (24 25) */ std %f14,[%sp+232] +/* 0x099c 857 (25 30) */ fdtox %f26,%f14 +/* 0x09a0 858 (25 26) */ std %f14,[%sp+208] +/* 0x09a4 859 (26 28) */ ldx [%sp+224],%o4 +/* 0x09a8 860 (26 31) */ fitod %f21,%f20 +/* 0x09ac 861 (26 31) */ fmuld %f22,%f12,%f30 +/* 0x09b0 862 (27 29) */ ldx [%sp+232],%o5 +/* 0x09b4 863 (27 32) */ fsubd %f18,%f16,%f16 +/* 0x09b8 864 (27 32) */ fmuld %f22,%f8,%f22 +/* 0x09bc 865 (28 29) */ sllx %o4,19,%o4 +/* 0x09c0 866 (28 33) */ fdtox %f4,%f4 +/* 0x09c4 867 (28 29) */ std %f4,[%sp+216] +/* 0x09c8 868 (28 33) */ fmuld %f6,%f12,%f24 +/* 0x09cc 869 (29 34) */ fsubd %f18,%f28,%f26 +/* 0x09d0 870 (29 30) */ add %o5,%o4,%o4 +/* 0x09d4 871 (29 34) */ fmuld %f6,%f8,%f6 +/* 0x09d8 872 (30 35) */ fsubd %f18,%f10,%f10 +/* 0x09dc 873 (30 31) */ add %o4,%g2,%g2 +/* 0x09e0 874 (30 31) */ st %g2,[%i0] +/* 0x09e4 875 (31 33) */ ldx [%sp+208],%o7 +/* 0x09e8 876 (31 32) */ srlx %g2,32,%o5 +/* 0x09ec 877 (31 36) */ fsubd %f18,%f20,%f18 +/* 0x09f0 878 (32 37) */ fdtox %f30,%f28 +/* 0x09f4 879 (32 33) */ std %f28,[%sp+192] +/* 0x09f8 880 (32 37) */ fmuld %f16,%f12,%f14 +/* 0x09fc 881 (33 34) */ sllx %o7,19,%o4 +/* 0x0a00 882 (33 35) */ ldx [%sp+216],%o7 +/* 0x0a04 883 (33 38) */ fdtox %f22,%f20 +/* 0x0a08 884 (33 38) */ fmuld %f16,%f8,%f16 +/* 0x0a0c 885 (34 35) */ std %f20,[%sp+200] +/* 0x0a10 886 (34 39) */ fdtox %f24,%f20 +/* 0x0a14 887 (34 39) */ fmuld %f26,%f12,%f22 +/* 0x0a18 888 (35 36) */ std %f20,[%sp+176] +/* 0x0a1c 889 (35 36) */ add %o7,%o4,%o4 +/* 0x0a20 890 (35 40) */ fdtox %f6,%f6 +/* 0x0a24 891 (35 40) */ fmuld %f10,%f12,%f4 +/* 0x0a28 892 (36 38) */ ldx [%sp+192],%o3 +/* 0x0a2c 893 (36 37) */ add %o4,%g3,%g3 +/* 0x0a30 894 (36 41) */ fmuld %f10,%f8,%f10 +/* 0x0a34 895 (37 38) */ std %f6,[%sp+184] +/* 0x0a38 896 (37 38) */ add %g3,%o5,%g3 +/* 0x0a3c 897 (37 42) */ fdtox %f14,%f6 +/* 0x0a40 898 (37 42) */ fmuld %f26,%f8,%f20 +/* 0x0a44 899 (38 40) */ ldx [%sp+200],%o4 +/* 0x0a48 900 (38 39) */ sllx %o3,19,%o3 +/* 0x0a4c 901 (38 39) */ srlx %g3,32,%o5 +/* 0x0a50 902 (38 43) */ fdtox %f16,%f14 +/* 0x0a54 903 (39 40) */ std %f6,[%sp+160] +/* 0x0a58 904 (39 44) */ fmuld %f18,%f12,%f12 +/* 0x0a5c 905 (40 42) */ ldx [%sp+176],%o7 +/* 0x0a60 906 (40 41) */ add %o4,%o3,%o3 +/* 0x0a64 907 (40 45) */ fdtox %f4,%f16 +/* 0x0a68 908 (40 45) */ fmuld %f18,%f8,%f18 +/* 0x0a6c 909 (41 42) */ std %f14,[%sp+168] +/* 0x0a70 910 (41 42) */ add %o3,%g4,%g4 +/* 0x0a74 911 (41 46) */ fdtox %f10,%f4 +/* 0x0a78 912 (42 44) */ ldx [%sp+184],%o3 +/* 0x0a7c 913 (42 43) */ sllx %o7,19,%o4 +/* 0x0a80 914 (42 43) */ add %g4,%o5,%g4 +/* 0x0a84 915 (42 47) */ fdtox %f22,%f14 +/* 0x0a88 916 (43 44) */ std %f16,[%sp+144] +/* 0x0a8c 917 (43 44) */ srlx %g4,32,%o5 +/* 0x0a90 918 (43 48) */ fdtox %f20,%f6 +/* 0x0a94 919 (44 46) */ ldx [%sp+160],%o7 +/* 0x0a98 920 (44 45) */ add %o3,%o4,%o3 +/* 0x0a9c 921 (44 49) */ fdtox %f12,%f16 +/* 0x0aa0 922 (45 46) */ std %f4,[%sp+152] +/* 0x0aa4 923 (45 46) */ add %o3,%g5,%g5 +/* 0x0aa8 924 (45 50) */ fdtox %f18,%f8 +/* 0x0aac 925 (46 48) */ ldx [%sp+168],%o3 +/* 0x0ab0 926 (46 47) */ sllx %o7,19,%o4 +/* 0x0ab4 927 (46 47) */ add %g5,%o5,%g5 +/* 0x0ab8 928 (47 48) */ std %f14,[%sp+128] +/* 0x0abc 929 (47 48) */ srlx %g5,32,%o5 +/* 0x0ac0 930 (48 49) */ std %f6,[%sp+136] +/* 0x0ac4 931 (48 49) */ add %o3,%o4,%o3 +/* 0x0ac8 932 (49 50) */ std %f16,[%sp+112] +/* 0x0acc 933 (49 50) */ add %o3,%o0,%o0 +/* 0x0ad0 934 (50 52) */ ldx [%sp+144],%o7 +/* 0x0ad4 935 (50 51) */ add %o0,%o5,%o0 +/* 0x0ad8 936 (51 53) */ ldx [%sp+152],%o3 +/* 0x0adc 937 (52 53) */ std %f8,[%sp+120] +/* 0x0ae0 938 (52 53) */ sllx %o7,19,%o4 +/* 0x0ae4 939 (52 53) */ srlx %o0,32,%o7 +/* 0x0ae8 940 (53 54) */ stx %o0,[%sp+104] +/* 0x0aec 941 (53 54) */ add %o3,%o4,%o3 +/* 0x0af0 942 (54 56) */ ldx [%sp+128],%o5 +/* 0x0af4 943 (54 55) */ add %o3,%o1,%o1 +/* 0x0af8 944 (55 57) */ ldx [%sp+136],%o0 +/* 0x0afc 945 (55 56) */ add %o1,%o7,%o1 +/* 0x0b00 946 (56 57) */ st %g3,[%i0+4] +/* 0x0b04 947 (56 57) */ sllx %o5,19,%o3 +/* 0x0b08 948 (57 59) */ ldx [%sp+112],%o4 +/* 0x0b0c 949 (57 58) */ add %o0,%o3,%o3 +/* 0x0b10 950 (58 60) */ ldx [%sp+120],%o0 +/* 0x0b14 951 (58 59) */ add %o3,%o2,%o2 +/* 0x0b18 952 (58 59) */ srlx %o1,32,%o3 +/* 0x0b1c 953 (59 60) */ st %o1,[%i0+20] +/* 0x0b20 954 (59 60) */ sllx %o4,19,%g2 +/* 0x0b24 955 (59 60) */ add %o2,%o3,%o2 +/* 0x0b28 956 (60 62) */ ldx [%sp+96],%o4 +/* 0x0b2c 957 (60 61) */ srlx %o2,32,%g3 +/* 0x0b30 958 (60 61) */ add %o0,%g2,%g2 +/* 0x0b34 959 (61 63) */ ldx [%sp+104],%o0 +/* 0x0b38 960 (62 63) */ st %o2,[%i0+24] +/* 0x0b3c 961 (62 63) */ add %g2,%o4,%g2 +/* 0x0b40 962 (63 64) */ st %o0,[%i0+16] +/* 0x0b44 963 (63 64) */ add %g2,%g3,%g2 +/* 0x0b48 964 (64 65) */ st %g4,[%i0+8] +/* 0x0b4c 968 (64 65) */ srlx %g2,32,%o7 +/* 0x0b50 969 (65 66) */ st %g5,[%i0+12] +/* 0x0b54 970 (66 67) */ st %g2,[%i0+28] +/* 0x0b58 971 (66 67) */ or %g0,%o7,%i0 +/* 0x0b5c (67 74) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0b60 (69 71) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000050 +! + + .L77000050: /* frequency 1.0 confidence 0.0 */ +/* 0x0b64 978 ( 0 1) */ subcc %o2,16,%g0 +/* 0x0b68 979 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50 +/* 0x0b6c ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0b70 981 ( 1 5) */ ldd [%g5],%f4 +/* 0x0b74 982 ( 2 6) */ ldd [%g5+8],%f6 +/* 0x0b78 989 ( 2 3) */ andn %o1,%g2,%g2 +/* 0x0b7c 993 ( 2 3) */ srl %o1,19,%g3 +/* 0x0b80 994 ( 3 7) */ ldd [%g5+16],%f8 +/* 0x0b84 995 ( 4 8) */ fxnor %f0,%f4,%f4 +/* 0x0b88 996 ( 4 5) */ st %g2,[%sp+356] +/* 0x0b8c 997 ( 5 9) */ ldd [%o0],%f20 +/* 0x0b90 998 ( 5 9) */ fxnor %f0,%f6,%f6 +/* 0x0b94 999 ( 6 7) */ st %g3,[%sp+352] +/* 0x0b98 1000 ( 6 10) */ fxnor %f0,%f8,%f8 +/* 0x0b9c 1005 ( 7 11) */ ldd [%o0+8],%f30 +/* 0x0ba0 1006 ( 8 13) */ fitod %f4,%f22 +/* 0x0ba4 1007 ( 8 12) */ ldd [%g5+24],%f10 +/* 0x0ba8 1008 ( 9 12) */ fmovs %f20,%f24 +/* 0x0bac 1009 ( 9 13) */ ldd [%g5+32],%f12 +/* 0x0bb0 1010 (10 15) */ fitod %f5,%f4 +/* 0x0bb4 1011 (10 14) */ ldd [%g5+40],%f14 +/* 0x0bb8 1012 (11 14) */ fmovs %f20,%f26 +/* 0x0bbc 1013 (11 15) */ ldd [%g5+48],%f16 +/* 0x0bc0 1014 (12 14) */ ld [%sp+356],%f25 +/* 0x0bc4 1015 (12 17) */ fitod %f6,%f28 +/* 0x0bc8 1016 (13 15) */ ld [%sp+352],%f27 +/* 0x0bcc 1017 (13 18) */ fitod %f8,%f32 +/* 0x0bd0 1018 (14 19) */ fsubd %f30,%f22,%f22 +/* 0x0bd4 1019 (14 18) */ ldd [%g5+56],%f18 +/* 0x0bd8 1020 (15 20) */ fsubd %f24,%f20,%f24 +/* 0x0bdc 1021 (16 21) */ fsubd %f26,%f20,%f20 +/* 0x0be0 1022 (17 22) */ fsubd %f30,%f4,%f4 +/* 0x0be4 1023 (18 23) */ fsubd %f30,%f28,%f26 +/* 0x0be8 1024 (19 24) */ fitod %f7,%f6 +/* 0x0bec 1025 (20 25) */ fsubd %f30,%f32,%f28 +/* 0x0bf0 1026 (20 25) */ fmuld %f22,%f24,%f32 +/* 0x0bf4 1027 (21 26) */ fmuld %f22,%f20,%f22 +/* 0x0bf8 1028 (21 25) */ fxnor %f0,%f10,%f10 +/* 0x0bfc 1029 (22 27) */ fmuld %f4,%f24,%f44 +/* 0x0c00 1030 (22 27) */ fitod %f9,%f8 +/* 0x0c04 1031 (23 28) */ fmuld %f4,%f20,%f4 +/* 0x0c08 1032 (23 27) */ fxnor %f0,%f12,%f12 +/* 0x0c0c 1033 (24 29) */ fsubd %f30,%f6,%f6 +/* 0x0c10 1034 (24 29) */ fmuld %f26,%f24,%f46 +/* 0x0c14 1035 (25 30) */ fitod %f10,%f34 +/* 0x0c18 1036 (26 31) */ fdtox %f22,%f22 +/* 0x0c1c 1037 (26 27) */ std %f22,[%sp+336] +/* 0x0c20 1038 (27 32) */ fmuld %f26,%f20,%f22 +/* 0x0c24 1039 (27 32) */ fdtox %f44,%f26 +/* 0x0c28 1040 (27 28) */ std %f26,[%sp+328] +/* 0x0c2c 1041 (28 33) */ fdtox %f4,%f4 +/* 0x0c30 1042 (28 29) */ std %f4,[%sp+320] +/* 0x0c34 1043 (29 34) */ fmuld %f6,%f24,%f26 +/* 0x0c38 1044 (29 34) */ fsubd %f30,%f8,%f8 +/* 0x0c3c 1045 (30 35) */ fdtox %f46,%f4 +/* 0x0c40 1046 (30 31) */ std %f4,[%sp+312] +/* 0x0c44 1047 (31 36) */ fmuld %f28,%f24,%f4 +/* 0x0c48 1048 (31 36) */ fdtox %f32,%f32 +/* 0x0c4c 1049 (31 32) */ std %f32,[%sp+344] +/* 0x0c50 1050 (32 37) */ fitod %f11,%f10 +/* 0x0c54 1051 (32 37) */ fmuld %f6,%f20,%f32 +/* 0x0c58 1052 (33 38) */ fsubd %f30,%f34,%f34 +/* 0x0c5c 1053 (34 39) */ fdtox %f22,%f6 +/* 0x0c60 1054 (34 35) */ std %f6,[%sp+304] +/* 0x0c64 1058 (35 40) */ fitod %f12,%f36 +/* 0x0c68 1059 (35 40) */ fmuld %f28,%f20,%f6 +/* 0x0c6c 1060 (36 41) */ fdtox %f26,%f22 +/* 0x0c70 1061 (36 37) */ std %f22,[%sp+296] +/* 0x0c74 1062 (37 42) */ fmuld %f8,%f24,%f22 +/* 0x0c78 1063 (37 42) */ fdtox %f4,%f4 +/* 0x0c7c 1064 (37 38) */ std %f4,[%sp+280] +/* 0x0c80 1065 (38 43) */ fmuld %f8,%f20,%f8 +/* 0x0c84 1066 (38 43) */ fsubd %f30,%f10,%f10 +/* 0x0c88 1067 (39 44) */ fmuld %f34,%f24,%f4 +/* 0x0c8c 1068 (39 44) */ fitod %f13,%f12 +/* 0x0c90 1069 (40 45) */ fsubd %f30,%f36,%f36 +/* 0x0c94 1070 (41 46) */ fdtox %f6,%f6 +/* 0x0c98 1071 (41 42) */ std %f6,[%sp+272] +/* 0x0c9c 1072 (42 46) */ fxnor %f0,%f14,%f14 +/* 0x0ca0 1073 (42 47) */ fmuld %f34,%f20,%f6 +/* 0x0ca4 1074 (43 48) */ fdtox %f22,%f22 +/* 0x0ca8 1075 (43 44) */ std %f22,[%sp+264] +/* 0x0cac 1076 (44 49) */ fdtox %f8,%f8 +/* 0x0cb0 1077 (44 45) */ std %f8,[%sp+256] +/* 0x0cb4 1078 (44 49) */ fmuld %f10,%f24,%f22 +/* 0x0cb8 1079 (45 50) */ fdtox %f4,%f4 +/* 0x0cbc 1080 (45 46) */ std %f4,[%sp+248] +/* 0x0cc0 1081 (45 50) */ fmuld %f10,%f20,%f8 +/* 0x0cc4 1082 (46 51) */ fsubd %f30,%f12,%f4 +/* 0x0cc8 1083 (46 51) */ fmuld %f36,%f24,%f10 +/* 0x0ccc 1084 (47 52) */ fitod %f14,%f38 +/* 0x0cd0 1085 (48 53) */ fdtox %f6,%f6 +/* 0x0cd4 1086 (48 49) */ std %f6,[%sp+240] +/* 0x0cd8 1087 (49 54) */ fdtox %f22,%f12 +/* 0x0cdc 1088 (49 50) */ std %f12,[%sp+232] +/* 0x0ce0 1089 (49 54) */ fmuld %f36,%f20,%f6 +/* 0x0ce4 1090 (50 55) */ fdtox %f8,%f8 +/* 0x0ce8 1091 (50 51) */ std %f8,[%sp+224] +/* 0x0cec 1092 (51 56) */ fdtox %f10,%f22 +/* 0x0cf0 1093 (51 52) */ std %f22,[%sp+216] +/* 0x0cf4 1094 (51 56) */ fmuld %f4,%f24,%f8 +/* 0x0cf8 1095 (52 57) */ fitod %f15,%f14 +/* 0x0cfc 1096 (52 57) */ fmuld %f4,%f20,%f4 +/* 0x0d00 1097 (53 58) */ fsubd %f30,%f38,%f22 +/* 0x0d04 1098 (54 58) */ fxnor %f0,%f16,%f16 +/* 0x0d08 1099 (55 60) */ fdtox %f6,%f6 +/* 0x0d0c 1100 (55 56) */ std %f6,[%sp+208] +/* 0x0d10 1101 (56 61) */ fdtox %f8,%f6 +/* 0x0d14 1102 (56 57) */ std %f6,[%sp+200] +/* 0x0d18 1103 (57 62) */ fsubd %f30,%f14,%f10 +/* 0x0d1c 1104 (58 63) */ fitod %f16,%f40 +/* 0x0d20 1105 (58 63) */ fmuld %f22,%f24,%f6 +/* 0x0d24 1106 (59 64) */ fdtox %f4,%f4 +/* 0x0d28 1107 (59 60) */ std %f4,[%sp+192] +/* 0x0d2c 1108 (60 65) */ fitod %f17,%f16 +/* 0x0d30 1109 (60 65) */ fmuld %f22,%f20,%f4 +/* 0x0d34 1110 (61 65) */ fxnor %f0,%f18,%f18 +/* 0x0d38 1111 (62 67) */ fdtox %f32,%f32 +/* 0x0d3c 1112 (62 63) */ std %f32,[%sp+288] +/* 0x0d40 1113 (62 67) */ fmuld %f10,%f24,%f8 +/* 0x0d44 1114 (63 68) */ fdtox %f6,%f6 +/* 0x0d48 1115 (63 64) */ std %f6,[%sp+184] +/* 0x0d4c 1116 (63 68) */ fmuld %f10,%f20,%f22 +/* 0x0d50 1117 (64 69) */ fsubd %f30,%f40,%f6 +/* 0x0d54 1118 (65 70) */ fdtox %f4,%f4 +/* 0x0d58 1119 (65 66) */ std %f4,[%sp+176] +/* 0x0d5c 1120 (66 71) */ fsubd %f30,%f16,%f10 +/* 0x0d60 1121 (67 72) */ fdtox %f8,%f4 +/* 0x0d64 1122 (67 68) */ std %f4,[%sp+168] +/* 0x0d68 1123 (68 73) */ fdtox %f22,%f4 +/* 0x0d6c 1124 (68 69) */ std %f4,[%sp+160] +/* 0x0d70 1125 (69 74) */ fitod %f18,%f42 +/* 0x0d74 1126 (69 74) */ fmuld %f6,%f24,%f4 +/* 0x0d78 1127 (70 75) */ fmuld %f6,%f20,%f22 +/* 0x0d7c 1128 (71 76) */ fmuld %f10,%f24,%f6 +/* 0x0d80 1129 (72 77) */ fmuld %f10,%f20,%f8 +/* 0x0d84 1130 (74 79) */ fdtox %f4,%f4 +/* 0x0d88 1131 (74 75) */ std %f4,[%sp+152] +/* 0x0d8c 1132 (75 80) */ fsubd %f30,%f42,%f4 +/* 0x0d90 1133 (76 81) */ fdtox %f6,%f6 +/* 0x0d94 1134 (76 77) */ std %f6,[%sp+136] +/* 0x0d98 1135 (77 82) */ fdtox %f22,%f22 +/* 0x0d9c 1136 (77 78) */ std %f22,[%sp+144] +/* 0x0da0 1137 (78 83) */ fdtox %f8,%f22 +/* 0x0da4 1138 (78 79) */ std %f22,[%sp+128] +/* 0x0da8 1139 (79 84) */ fitod %f19,%f22 +/* 0x0dac 1140 (80 85) */ fmuld %f4,%f24,%f6 +/* 0x0db0 1141 (81 86) */ fmuld %f4,%f20,%f4 +/* 0x0db4 1142 (84 89) */ fsubd %f30,%f22,%f22 +/* 0x0db8 1143 (85 90) */ fdtox %f6,%f6 +/* 0x0dbc 1144 (85 86) */ std %f6,[%sp+120] +/* 0x0dc0 1145 (86 91) */ fdtox %f4,%f4 +/* 0x0dc4 1146 (86 87) */ std %f4,[%sp+112] +/* 0x0dc8 1150 (87 89) */ ldx [%sp+336],%g2 +/* 0x0dcc 1151 (88 90) */ ldx [%sp+344],%g3 +/* 0x0dd0 1152 (89 91) */ ld [%i1],%g4 +/* 0x0dd4 1153 (89 90) */ sllx %g2,19,%g2 +/* 0x0dd8 1154 (89 94) */ fmuld %f22,%f20,%f4 +/* 0x0ddc 1155 (90 92) */ ldx [%sp+328],%g5 +/* 0x0de0 1156 (90 91) */ add %g3,%g2,%g2 +/* 0x0de4 1157 (90 95) */ fmuld %f22,%f24,%f6 +/* 0x0de8 1158 (91 93) */ ldx [%sp+320],%g3 +/* 0x0dec 1159 (91 92) */ add %g2,%g4,%g4 +/* 0x0df0 1160 (92 94) */ ldx [%sp+304],%o0 +/* 0x0df4 1161 (93 94) */ st %g4,[%i0] +/* 0x0df8 1162 (93 94) */ sllx %g3,19,%g2 +/* 0x0dfc 1163 (93 94) */ srlx %g4,32,%g4 +/* 0x0e00 1164 (94 96) */ ld [%i1+4],%g3 +/* 0x0e04 1165 (94 95) */ add %g5,%g2,%g2 +/* 0x0e08 1166 (94 99) */ fdtox %f4,%f4 +/* 0x0e0c 1167 (95 97) */ ldx [%sp+312],%g5 +/* 0x0e10 1168 (95 100) */ fdtox %f6,%f6 +/* 0x0e14 1169 (96 98) */ ldx [%sp+288],%o1 +/* 0x0e18 1170 (96 97) */ add %g2,%g3,%g2 +/* 0x0e1c 1171 (96 97) */ sllx %o0,19,%g3 +/* 0x0e20 1172 (97 99) */ ldx [%sp+272],%o2 +/* 0x0e24 1173 (97 98) */ add %g2,%g4,%g2 +/* 0x0e28 1174 (97 98) */ add %g5,%g3,%g3 +/* 0x0e2c 1175 (98 100) */ ld [%i1+8],%g4 +/* 0x0e30 1176 (98 99) */ srlx %g2,32,%o0 +/* 0x0e34 1177 (99 101) */ ldx [%sp+296],%g5 +/* 0x0e38 1178 (100 101) */ st %g2,[%i0+4] +/* 0x0e3c 1179 (100 101) */ sllx %o2,19,%g2 +/* 0x0e40 1180 (100 101) */ add %g3,%g4,%g3 +/* 0x0e44 1181 (101 103) */ ldx [%sp+256],%o2 +/* 0x0e48 1182 (101 102) */ sllx %o1,19,%g4 +/* 0x0e4c 1183 (101 102) */ add %g3,%o0,%g3 +/* 0x0e50 1184 (102 104) */ ld [%i1+12],%o0 +/* 0x0e54 1185 (102 103) */ srlx %g3,32,%o1 +/* 0x0e58 1186 (102 103) */ add %g5,%g4,%g4 +/* 0x0e5c 1187 (103 105) */ ldx [%sp+280],%g5 +/* 0x0e60 1188 (104 105) */ st %g3,[%i0+8] +/* 0x0e64 1189 (104 105) */ sllx %o2,19,%g3 +/* 0x0e68 1190 (104 105) */ add %g4,%o0,%g4 +/* 0x0e6c 1191 (105 107) */ ld [%i1+16],%o0 +/* 0x0e70 1192 (105 106) */ add %g5,%g2,%g2 +/* 0x0e74 1193 (105 106) */ add %g4,%o1,%g4 +/* 0x0e78 1194 (106 108) */ ldx [%sp+264],%g5 +/* 0x0e7c 1195 (106 107) */ srlx %g4,32,%o1 +/* 0x0e80 1196 (107 109) */ ldx [%sp+240],%o2 +/* 0x0e84 1197 (107 108) */ add %g2,%o0,%g2 +/* 0x0e88 1198 (108 110) */ ld [%i1+20],%o0 +/* 0x0e8c 1199 (108 109) */ add %g5,%g3,%g3 +/* 0x0e90 1200 (108 109) */ add %g2,%o1,%g2 +/* 0x0e94 1201 (109 111) */ ldx [%sp+248],%g5 +/* 0x0e98 1202 (109 110) */ srlx %g2,32,%o1 +/* 0x0e9c 1203 (110 111) */ st %g4,[%i0+12] +/* 0x0ea0 1204 (110 111) */ sllx %o2,19,%g4 +/* 0x0ea4 1205 (110 111) */ add %g3,%o0,%g3 +/* 0x0ea8 1206 (111 113) */ ld [%i1+24],%o0 +/* 0x0eac 1207 (111 112) */ add %g5,%g4,%g4 +/* 0x0eb0 1208 (111 112) */ add %g3,%o1,%g3 +/* 0x0eb4 1209 (112 114) */ ldx [%sp+224],%o2 +/* 0x0eb8 1210 (112 113) */ srlx %g3,32,%o1 +/* 0x0ebc 1211 (113 115) */ ldx [%sp+232],%g5 +/* 0x0ec0 1212 (113 114) */ add %g4,%o0,%g4 +/* 0x0ec4 1213 (114 115) */ st %g2,[%i0+16] +/* 0x0ec8 1214 (114 115) */ sllx %o2,19,%g2 +/* 0x0ecc 1215 (114 115) */ add %g4,%o1,%g4 +/* 0x0ed0 1216 (115 117) */ ld [%i1+28],%o0 +/* 0x0ed4 1217 (115 116) */ srlx %g4,32,%o1 +/* 0x0ed8 1218 (115 116) */ add %g5,%g2,%g2 +/* 0x0edc 1222 (116 118) */ ldx [%sp+208],%o2 +/* 0x0ee0 1223 (117 119) */ ldx [%sp+216],%g5 +/* 0x0ee4 1224 (117 118) */ add %g2,%o0,%g2 +/* 0x0ee8 1225 (118 119) */ st %g3,[%i0+20] +/* 0x0eec 1226 (118 119) */ sllx %o2,19,%g3 +/* 0x0ef0 1227 (118 119) */ add %g2,%o1,%g2 +/* 0x0ef4 1228 (119 121) */ ld [%i1+32],%o0 +/* 0x0ef8 1229 (119 120) */ srlx %g2,32,%o1 +/* 0x0efc 1230 (119 120) */ add %g5,%g3,%g3 +/* 0x0f00 1231 (120 122) */ ldx [%sp+192],%o2 +/* 0x0f04 1232 (121 123) */ ldx [%sp+200],%g5 +/* 0x0f08 1233 (121 122) */ add %g3,%o0,%g3 +/* 0x0f0c 1234 (122 123) */ st %g4,[%i0+24] +/* 0x0f10 1235 (122 123) */ sllx %o2,19,%g4 +/* 0x0f14 1236 (122 123) */ add %g3,%o1,%g3 +/* 0x0f18 1237 (123 125) */ ld [%i1+36],%o0 +/* 0x0f1c 1238 (123 124) */ srlx %g3,32,%o1 +/* 0x0f20 1239 (123 124) */ add %g5,%g4,%g4 +/* 0x0f24 1240 (124 126) */ ldx [%sp+176],%o2 +/* 0x0f28 1241 (125 127) */ ldx [%sp+184],%g5 +/* 0x0f2c 1242 (125 126) */ add %g4,%o0,%g4 +/* 0x0f30 1243 (126 127) */ st %g2,[%i0+28] +/* 0x0f34 1244 (126 127) */ sllx %o2,19,%g2 +/* 0x0f38 1245 (126 127) */ add %g4,%o1,%g4 +/* 0x0f3c 1246 (127 129) */ ld [%i1+40],%o0 +/* 0x0f40 1247 (127 128) */ srlx %g4,32,%o1 +/* 0x0f44 1248 (127 128) */ add %g5,%g2,%g2 +/* 0x0f48 1249 (128 130) */ ldx [%sp+160],%o2 +/* 0x0f4c 1250 (129 131) */ ldx [%sp+168],%g5 +/* 0x0f50 1251 (129 130) */ add %g2,%o0,%g2 +/* 0x0f54 1252 (130 131) */ st %g3,[%i0+32] +/* 0x0f58 1253 (130 131) */ sllx %o2,19,%g3 +/* 0x0f5c 1254 (130 131) */ add %g2,%o1,%g2 +/* 0x0f60 1255 (131 133) */ ld [%i1+44],%o0 +/* 0x0f64 1256 (131 132) */ srlx %g2,32,%o1 +/* 0x0f68 1257 (131 132) */ add %g5,%g3,%g3 +/* 0x0f6c 1258 (132 134) */ ldx [%sp+144],%o2 +/* 0x0f70 1259 (133 135) */ ldx [%sp+152],%g5 +/* 0x0f74 1260 (133 134) */ add %g3,%o0,%g3 +/* 0x0f78 1261 (134 135) */ st %g4,[%i0+36] +/* 0x0f7c 1262 (134 135) */ sllx %o2,19,%g4 +/* 0x0f80 1263 (134 135) */ add %g3,%o1,%g3 +/* 0x0f84 1264 (135 137) */ ld [%i1+48],%o0 +/* 0x0f88 1265 (135 136) */ srlx %g3,32,%o1 +/* 0x0f8c 1266 (135 136) */ add %g5,%g4,%g4 +/* 0x0f90 1267 (136 138) */ ldx [%sp+128],%o2 +/* 0x0f94 1268 (137 139) */ ldx [%sp+136],%g5 +/* 0x0f98 1269 (137 138) */ add %g4,%o0,%g4 +/* 0x0f9c 1270 (138 139) */ std %f4,[%sp+96] +/* 0x0fa0 1271 (138 139) */ add %g4,%o1,%g4 +/* 0x0fa4 1272 (139 140) */ st %g2,[%i0+40] +/* 0x0fa8 1273 (139 140) */ sllx %o2,19,%g2 +/* 0x0fac 1274 (139 140) */ srlx %g4,32,%o1 +/* 0x0fb0 1275 (140 142) */ ld [%i1+52],%o0 +/* 0x0fb4 1276 (140 141) */ add %g5,%g2,%g2 +/* 0x0fb8 1277 (141 142) */ std %f6,[%sp+104] +/* 0x0fbc 1278 (142 144) */ ldx [%sp+120],%g5 +/* 0x0fc0 1279 (142 143) */ add %g2,%o0,%g2 +/* 0x0fc4 1280 (143 144) */ st %g3,[%i0+44] +/* 0x0fc8 1281 (143 144) */ add %g2,%o1,%g2 +/* 0x0fcc 1282 (144 146) */ ldx [%sp+112],%o2 +/* 0x0fd0 1283 (144 145) */ srlx %g2,32,%o1 +/* 0x0fd4 1284 (145 147) */ ld [%i1+56],%o0 +/* 0x0fd8 1285 (146 147) */ st %g4,[%i0+48] +/* 0x0fdc 1286 (146 147) */ sllx %o2,19,%g3 +/* 0x0fe0 1287 (147 149) */ ldx [%sp+96],%o2 +/* 0x0fe4 1288 (147 148) */ add %g5,%g3,%g3 +/* 0x0fe8 1289 (148 150) */ ldx [%sp+104],%g5 +/* 0x0fec 1290 (148 149) */ add %g3,%o0,%g3 +/* 0x0ff0 1291 (149 151) */ ld [%i1+60],%o0 +/* 0x0ff4 1292 (149 150) */ sllx %o2,19,%g4 +/* 0x0ff8 1293 (149 150) */ add %g3,%o1,%g3 +/* 0x0ffc 1294 (150 151) */ st %g2,[%i0+52] +/* 0x1000 1295 (150 151) */ srlx %g3,32,%o1 +/* 0x1004 1296 (150 151) */ add %g5,%g4,%g4 +/* 0x1008 1297 (151 152) */ st %g3,[%i0+56] +/* 0x100c 1298 (151 152) */ add %g4,%o0,%g2 +/* 0x1010 1299 (152 153) */ add %g2,%o1,%g2 +/* 0x1014 1300 (152 153) */ st %g2,[%i0+60] +/* 0x1018 1304 (153 154) */ srlx %g2,32,%o7 + +! +! ENTRY .L77000061 +! + + .L77000061: /* frequency 1.0 confidence 0.0 */ +/* 0x119c 1437 ( 0 1) */ or %g0,%o7,%i0 + +! +! ENTRY .L900000159 +! + + .L900000159: /* frequency 1.0 confidence 0.0 */ +/* 0x11a0 ( 0 7) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x11a4 ( 2 4) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000073 +! + + .L77000073: /* frequency 1.0 confidence 0.0 */ + or %g0, %i4, %o2 + or %g0, %o0, %o1 + or %g0, %i3, %o0 + +! +! ENTRY .L77000052 +! + + .L77000052: /* frequency 1.0 confidence 0.0 */ +/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2 +/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+96] +/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3 +/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14 +/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2 +/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+92] +/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5 +/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2 +/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6 +/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1800),%g1 +/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2 +/* 0x1054 1337 ( 3 4) */ xor %g1,-304,%g1 +/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20 +/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3 +/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8 +/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3 +/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10 +/* 0x106c 1343 ( 5 7) */ ld [%sp+96],%f9 +/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0 +/* 0x1074 1345 ( 6 8) */ ld [%sp+92],%f11 +/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1800),%g1 +/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1 +/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18 +/* 0x1084 1349 ( 7 8) */ xor %g1,-296,%g1 +/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4 +/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16 +/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50 +/* 0x1094 ( 8 9) */ subcc %o0,0,%g0 +/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2 +/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1800),%g1 +/* 0x10a0 1356 (10 11) */ xor %g1,-288,%g1 +/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0 +/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7 +/* 0x10ac 1359 (11 12) */ sethi %hi(0x1800),%g1 +/* 0x10b0 1360 (12 13) */ xor %g1,-280,%g1 +/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4 +/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50 +/* 0x10bc (13 14) */ sub %o3,2,%o2 +/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2 +/* 0x10c4 1365 (14 15) */ add %o1,16,%g5 +/* 0x10c8 1366 (14 15) */ or %g0,4,%g4 +/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0 +/* 0x10d0 1368 (15 16) */ add %o1,8,%o1 +/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6 +/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4 +/* 0x10dc 1371 (16 17) */ add %o1,16,%o1 +/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12 +/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0 +/* 0x10e8 1374 (17 18) */ add %o1,8,%o1 +/* 0x10ec 1375 (18 21) */ fitod %f7,%f2 +/* 0x10f0 1376 (19 22) */ fitod %f6,%f6 +/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10 +/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8 +/* 0x1100 1380 (23 26) */ fitod %f13,%f4 +/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6 +/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000154 +! + + .L990000154: /* frequency 1.0 confidence 0.0 */ +/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24 +/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4 +/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4 +/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22 +/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26 +/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0 +/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7 +/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28 +/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6 +/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2 +/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3 +/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0 +/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4 +/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2 +/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12 +/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6 +/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96] +/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96] +/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2 +/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6 +/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96] +/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1 +/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12 +/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4 +/* 0x116c 1408 (10 11) */ std %f0,[%o4-96] +/* 0x1170 1409 (11 14) */ ldd [%o1],%f0 +/* 0x1174 1410 (11 14) */ fitod %f9,%f2 +/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28 +/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24 +/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22 +/* 0x1184 1414 (13 16) */ fdtox %f4,%f4 +/* 0x1188 1415 (14 17) */ fitod %f10,%f6 +/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10 +/* 0x1190 1417 (15 18) */ fdtox %f24,%f24 +/* 0x1194 1418 (16 19) */ fdtox %f22,%f22 +/* 0x1198 1419 (16 17) */ std %f24,[%g3-64] +/* 0x119c 1420 (17 18) */ std %f22,[%g2-64] +/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10 +/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6 +/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64] +/* 0x11ac 1424 (18 19) */ add %o1,8,%o1 +/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10 +/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0 +/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64] +/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22 +/* 0x11c0 1429 (20 23) */ fitod %f13,%f4 +/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26 +/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24 +/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0 +/* 0x11d4 1434 (23 26) */ fitod %f8,%f6 +/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8 +/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26 +/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24 +/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32] +/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32] +/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8 +/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6 +/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32] +/* 0x11f8 1443 (27 28) */ add %o1,8,%o1 +/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8 +/* 0x1200 1445 (28 29) */ std %f0,[%o4-32] +/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50 +/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000157 +! + + .L990000157: /* frequency 1.0 confidence 0.0 */ +/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28 +/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24 +/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3 +/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12 +/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26 +/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2 +/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4 +/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22 +/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7 +/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6 +/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128] +/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4 +/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2 +/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0 +/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6 +/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24 +/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10 +/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128] +/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10 +/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128] +/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26 +/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10 +/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22 +/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12 +/* 0x1270 1474 (10 13) */ fdtox %f0,%f0 +/* 0x1274 1475 (10 11) */ std %f0,[%o4-128] +/* 0x1278 1476 (11 14) */ fitod %f8,%f4 +/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6 +/* 0x1280 1478 (12 15) */ fdtox %f26,%f0 +/* 0x1284 1479 (12 13) */ std %f0,[%g3-96] +/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10 +/* 0x128c 1481 (13 16) */ fdtox %f2,%f2 +/* 0x1290 1482 (13 14) */ std %f2,[%g2-96] +/* 0x1294 1483 (14 17) */ fitod %f9,%f0 +/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2 +/* 0x129c 1485 (15 18) */ fdtox %f24,%f8 +/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96] +/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4 +/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8 +/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12 +/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96] +/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0 +/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6 +/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64] +/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10 +/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64] +/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6 +/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2 +/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64] +/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4 +/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2 +/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8 +/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64] +/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6 +/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32] +/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0 +/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4 +/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32] +/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2 +/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32] +/* 0x1300 1510 (26 29) */ fdtox %f0,%f0 +/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50 +/* 0x1308 (26 27) */ std %f0,[%o4-32] + +! +! ENTRY .L77000054 +! + + .L77000054: /* frequency 1.0 confidence 0.0 */ +/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0 + +! +! ENTRY .L990000161 +! + + .L990000161: /* frequency 1.0 confidence 0.0 */ +/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4 +/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1 +/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0 +/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2 +/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0 +/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2 +/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0 +/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6 +/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4 +/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2 +/* 0x133c 1527 (11 14) */ fdtox %f6,%f6 +/* 0x1340 1528 (11 12) */ std %f6,[%g3] +/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0 +/* 0x1348 1530 (12 15) */ fdtox %f4,%f4 +/* 0x134c 1531 (12 13) */ std %f4,[%g2] +/* 0x1350 1532 (12 13) */ add %g2,32,%g2 +/* 0x1354 1533 (13 16) */ fdtox %f2,%f2 +/* 0x1358 1534 (13 14) */ std %f2,[%o7] +/* 0x135c 1535 (13 14) */ add %o7,32,%o7 +/* 0x1360 1536 (14 17) */ fdtox %f0,%f0 +/* 0x1364 1537 (14 15) */ std %f0,[%o4] +/* 0x1368 1538 (14 15) */ add %o4,32,%o4 +/* 0x136c 1539 (15 16) */ add %g3,32,%g3 +/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50 +/* 0x1374 (16 19) */ ldd [%o1],%f0 + +! +! ENTRY .L77000056 +! + + .L77000056: /* frequency 1.0 confidence 0.0 */ +/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0 + +! +! ENTRY .L990000162 +! + + .L990000162: /* frequency 1.0 confidence 0.0 */ +/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50 +/* 0x1380 ( 0 1) */ nop +/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1800),%g1 +/* 0x1388 1556 ( 1 2) */ xor %g1,-304,%g1 +/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4 +/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5 +/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1800),%g1 +/* 0x1398 1560 ( 3 4) */ xor %g1,-296,%g1 +/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7 +/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2 +/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2 +/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3 +/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0 +/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50 +/* 0x13b4 ( 6 7) */ sethi %hi(0x1800),%g1 +/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2 +/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3 +/* 0x13c0 1570 ( 7 8) */ xor %g1,-264,%g1 +/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4 +/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2 +/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1800),%g1 +/* 0x13d0 1574 ( 9 10) */ xor %g1,-272,%g1 +/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2 +/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5 +/* 0x13dc 1577 (10 11) */ sethi %hi(0x1800),%g1 +/* 0x13e0 1578 (11 12) */ xor %g1,-296,%g1 +/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1 +/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1 +/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0 +/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1 +/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3 +/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0 +/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1 +/* 0x1400 1586 (16 17) */ add %g4,8,%g4 +/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3 +/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0 +/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2 +/* 0x1410 1590 (18 19) */ st %o0,[%g3-4] +/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000142 +! + + .L990000142: /* frequency 1.0 confidence 0.0 */ +/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2 +/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2 +/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3 +/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5 +/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1 +/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0 +/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2 +/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0 +/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1 +/* 0x143c 1602 ( 4 5) */ st %o1,[%g3] +/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5 +/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0 +/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1 +/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0 +/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3 +/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2 +/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0 +/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3 +/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1 +/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0 +/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12] +/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5 +/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4 +/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0 +/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1 +/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2 +/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3 +/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2 +/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1 +/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0 +/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2 +/* 0x1494 1624 (12 13) */ st %o2,[%g3-8] +/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5 +/* 0x149c 1626 (12 13) */ add %g5,64,%g5 +/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2 +/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0 +/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1 +/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0 +/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3 +/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2 +/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0 +/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4] +/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50 +/* 0x14c4 (16 17) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000145 +! + + .L990000145: /* frequency 1.0 confidence 0.0 */ +/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3 +/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3 +/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2 +/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0 +/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0 +/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4] +/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0 +/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50 +/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5 + +! +! ENTRY .L77000058 +! + + .L77000058: /* frequency 1.0 confidence 0.0 */ +/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2 + +! +! ENTRY .L990000160 +! + + .L990000160: /* frequency 1.0 confidence 0.0 */ +/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3 +/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0 +/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2 +/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1 +/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2 +/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2 +/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0 +/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5 +/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0 +/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4 +/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0 +/* 0x151c 1661 ( 4 5) */ st %o0,[%g3] +/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0 +/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5 +/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3 +/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50 +/* 0x1530 ( 6 8) */ ldx [%g2],%o2 + +! +! ENTRY .L77770061 +! + + .L77770061: /* frequency 1.0 confidence 0.0 */ +/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0 + +/* 0x11a8 1441 ( 0 0) */ .type mul_add,2 +/* 0x11a8 1442 ( 0 0) */ .size mul_add,(.-mul_add) +/* 0x11a8 1445 ( 0 0) */ .align 16 +/* 0x11b0 1451 ( 0 0) */ .global mul_add_inp + +! +! ENTRY mul_add_inp +! + + .global mul_add_inp + mul_add_inp: /* frequency 1.0 confidence 0.0 */ +/* 0x11b0 1453 ( 0 1) */ or %g0,%o2,%g1 +/* 0x11b4 1454 ( 0 1) */ or %g0,%o3,%o4 +/* 0x11b8 1455 ( 1 2) */ or %g0,%o0,%g3 +/* 0x11bc 1456 ( 1 2) */ or %g0,%o1,%g2 +/* 0x11c0 1466 ( 2 3) */ or %g0,%g1,%o3 +/* 0x11c4 1467 ( 2 3) */ or %g0,%g3,%o1 +/* 0x11c8 1468 ( 3 4) */ or %g0,%g2,%o2 +/* 0x11cc 1469 ( 3 4) */ or %g0,%o7,%g1 +/* 0x11d0 1470 ( 4 6) */ call mul_add ! params = ! Result = +/* 0x11d4 ( 5 6) */ or %g0,%g1,%o7 +/* 0x11d8 1472 ( 0 0) */ .type mul_add_inp,2 +/* 0x11d8 1473 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp) + + .section ".data",#alloc,#write +/* 0x11d8 6 ( 0 0) */ .align 8 + +! +! ENTRY mask_cnst +! + + mask_cnst: /* frequency 1.0 confidence 0.0 */ +/* 0x11d8 8 ( 0 0) */ .word -2147483648 +/* 0x11dc 9 ( 0 0) */ .word -2147483648 +/* 0x11e0 10 ( 0 0) */ .type mask_cnst,#object +/* 0x11e0 11 ( 0 0) */ .size mask_cnst,8 + diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s new file mode 100644 index 0000000000..e2fbe0bd00 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s @@ -0,0 +1,1645 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .register %g2,#scratch +/* 000000 ( 0 0) */ .register %g3,#scratch +/* 000000 3 ( 0 0) */ .file "mpv_sparc.c" +/* 000000 15 ( 0 0) */ .align 8 +! +! SUBROUTINE .L_const_seg_900000101 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .L_const_seg_900000101: /* frequency 1.0 confidence 0.0 */ +/* 000000 20 ( 0 0) */ .word 1127219200,0 +/* 0x0008 21 ( 0 0) */ .word 1105199103,-4194304 +/* 0x0010 22 ( 0 0) */ .align 8 +/* 0x0010 28 ( 0 0) */ .global mul_add + +! +! ENTRY mul_add +! + + .global mul_add + mul_add: /* frequency 1.0 confidence 0.0 */ +/* 0x0010 30 ( 0 1) */ sethi %hi(0x1c00),%g1 +/* 0x0014 31 ( 0 1) */ sethi %hi(mask_cnst),%g2 +/* 0x0018 32 ( 1 2) */ xor %g1,-48,%g1 +/* 0x001c 33 ( 1 2) */ add %g2,%lo(mask_cnst),%g2 +/* 0x0020 34 ( 2 3) */ save %sp,%g1,%sp + +! +! ENTRY .L900000149 +! + + .L900000149: /* frequency 1.0 confidence 0.0 */ +/* 0x0024 36 ( 0 2) */ call (.+0x8) ! params = ! Result = +/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5 +/* 0x002c 178 ( 2 3) */ sethi %hi(.L_const_seg_900000101),%g3 +/* 0x0030 179 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5 +/* 0x0034 180 ( 3 4) */ add %g3,%lo(.L_const_seg_900000101),%g3 +/* 0x0038 181 ( 3 4) */ add %g5,%o7,%o1 +/* 0x003c 182 ( 4 5) */ sethi %hi(0x80000),%g4 +/* 0x0040 183 ( 4 6) */ ldx [%o1+%g2],%g2 +/* 0x0044 184 ( 4 5) */ or %g0,%i2,%o2 +/* 0x0048 185 ( 5 6) */ subcc %i4,%g4,%g0 +/* 0x004c 186 ( 5 7) */ ldx [%o1+%g3],%o0 +/* 0x0050 187 ( 6 7) */ or %g0,%i0,%o7 +/* 0x0054 188 ( 6 7) */ or %g0,%i1,%o5 +/* 0x0058 189 ( 6 9) */ ldd [%g2],%f0 +/* 0x005c 190 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50 +/* 0x0060 ( 7 8) */ subcc %i3,8,%g0 +/* 0x0064 192 ( 7 8) */ bne,pn %icc,.L900000158 ! tprob=0.50 +/* 0x0068 ( 8 9) */ subcc %i3,16,%g0 +/* 0x006c 194 ( 9 12) */ ldd [%o2],%f4 +/* 0x0070 195 (10 11) */ st %i4,[%sp+2287] +/* 0x0074 196 (11 14) */ ldd [%o0],%f8 +/* 0x0078 197 (11 13) */ fxnor %f0,%f4,%f4 +/* 0x007c 198 (12 15) */ ldd [%o2+8],%f10 +/* 0x0080 199 (13 16) */ fitod %f4,%f12 +/* 0x0084 200 (13 16) */ ldd [%o0+8],%f14 +/* 0x0088 201 (14 17) */ ld [%sp+2287],%f7 +/* 0x008c 202 (14 17) */ fitod %f5,%f4 +/* 0x0090 203 (15 17) */ fxnor %f0,%f10,%f10 +/* 0x0094 204 (15 18) */ ldd [%o2+16],%f16 +/* 0x0098 205 (16 19) */ ldd [%o2+24],%f18 +/* 0x009c 206 (17 20) */ fsubd %f14,%f4,%f4 +/* 0x00a0 210 (17 20) */ ld [%i1],%g2 +/* 0x00a4 211 (18 20) */ fxnor %f0,%f16,%f16 +/* 0x00a8 212 (18 21) */ ld [%i1+4],%g3 +/* 0x00ac 213 (19 22) */ ld [%i1+8],%g4 +/* 0x00b0 214 (20 23) */ fitod %f16,%f20 +/* 0x00b4 215 (20 23) */ ld [%i1+16],%o0 +/* 0x00b8 216 (21 24) */ ld [%i1+12],%g5 +/* 0x00bc 217 (22 25) */ ld [%i1+20],%o1 +/* 0x00c0 218 (23 26) */ ld [%i1+24],%o2 +/* 0x00c4 219 (24 25) */ fmovs %f8,%f6 +/* 0x00c8 220 (24 27) */ ld [%i1+28],%o3 +/* 0x00cc 221 (26 29) */ fsubd %f6,%f8,%f6 +/* 0x00d0 222 (27 30) */ fsubd %f14,%f12,%f8 +/* 0x00d4 223 (28 31) */ fitod %f10,%f12 +/* 0x00d8 224 (29 32) */ fmuld %f4,%f6,%f4 +/* 0x00dc 225 (29 32) */ fitod %f11,%f10 +/* 0x00e0 226 (30 33) */ fmuld %f8,%f6,%f8 +/* 0x00e4 227 (31 34) */ fsubd %f14,%f12,%f12 +/* 0x00e8 228 (32 35) */ fdtox %f4,%f4 +/* 0x00ec 229 (32 33) */ std %f4,[%sp+2271] +/* 0x00f0 230 (33 36) */ fdtox %f8,%f8 +/* 0x00f4 231 (33 34) */ std %f8,[%sp+2279] +/* 0x00f8 232 (34 37) */ fmuld %f12,%f6,%f12 +/* 0x00fc 233 (34 37) */ fsubd %f14,%f10,%f10 +/* 0x0100 234 (35 38) */ fsubd %f14,%f20,%f4 +/* 0x0104 235 (36 39) */ fitod %f17,%f8 +/* 0x0108 236 (37 39) */ fxnor %f0,%f18,%f16 +/* 0x010c 237 (37 39) */ ldx [%sp+2279],%o4 +/* 0x0110 238 (37 40) */ fmuld %f10,%f6,%f10 +/* 0x0114 239 (38 41) */ fdtox %f12,%f12 +/* 0x0118 240 (38 39) */ std %f12,[%sp+2263] +/* 0x011c 241 (38 41) */ fmuld %f4,%f6,%f4 +/* 0x0120 242 (39 42) */ fitod %f16,%f18 +/* 0x0124 243 (39 40) */ add %o4,%g2,%g2 +/* 0x0128 244 (39 40) */ st %g2,[%i0] +/* 0x012c 245 (40 42) */ ldx [%sp+2271],%o4 +/* 0x0130 246 (40 43) */ fsubd %f14,%f8,%f8 +/* 0x0134 247 (40 41) */ srax %g2,32,%o5 +/* 0x0138 248 (41 44) */ fdtox %f10,%f10 +/* 0x013c 249 (41 42) */ std %f10,[%sp+2255] +/* 0x0140 250 (42 45) */ fdtox %f4,%f4 +/* 0x0144 251 (42 43) */ std %f4,[%sp+2247] +/* 0x0148 252 (42 43) */ add %o4,%g3,%o4 +/* 0x014c 253 (43 46) */ fitod %f17,%f12 +/* 0x0150 254 (43 45) */ ldx [%sp+2263],%g2 +/* 0x0154 255 (43 44) */ add %o4,%o5,%g3 +/* 0x0158 256 (43 46) */ fmuld %f8,%f6,%f8 +/* 0x015c 257 (44 47) */ fsubd %f14,%f18,%f10 +/* 0x0160 258 (44 45) */ st %g3,[%i0+4] +/* 0x0164 259 (44 45) */ srax %g3,32,%g3 +/* 0x0168 260 (45 46) */ add %g2,%g4,%g4 +/* 0x016c 261 (45 47) */ ldx [%sp+2255],%g2 +/* 0x0170 262 (46 49) */ fsubd %f14,%f12,%f4 +/* 0x0174 263 (46 47) */ add %g4,%g3,%g3 +/* 0x0178 264 (46 48) */ ldx [%sp+2247],%g4 +/* 0x017c 265 (47 50) */ fmuld %f10,%f6,%f10 +/* 0x0180 266 (47 50) */ fdtox %f8,%f8 +/* 0x0184 267 (47 48) */ std %f8,[%sp+2239] +/* 0x0188 268 (48 49) */ add %g4,%o0,%g4 +/* 0x018c 269 (48 49) */ add %g2,%g5,%g2 +/* 0x0190 270 (48 49) */ st %g3,[%i0+8] +/* 0x0194 271 (49 52) */ fmuld %f4,%f6,%f4 +/* 0x0198 272 (49 50) */ srax %g3,32,%o0 +/* 0x019c 273 (49 51) */ ldx [%sp+2239],%g5 +/* 0x01a0 274 (50 53) */ fdtox %f10,%f6 +/* 0x01a4 275 (50 51) */ std %f6,[%sp+2231] +/* 0x01a8 276 (50 51) */ add %g2,%o0,%g2 +/* 0x01ac 277 (51 52) */ srax %g2,32,%g3 +/* 0x01b0 278 (51 52) */ add %g5,%o1,%o1 +/* 0x01b4 279 (51 52) */ st %g2,[%i0+12] +/* 0x01b8 280 (52 55) */ fdtox %f4,%f4 +/* 0x01bc 281 (52 53) */ std %f4,[%sp+2223] +/* 0x01c0 282 (52 53) */ add %g4,%g3,%g3 +/* 0x01c4 283 (53 54) */ srax %g3,32,%g4 +/* 0x01c8 284 (53 54) */ st %g3,[%i0+16] +/* 0x01cc 285 (54 56) */ ldx [%sp+2231],%o0 +/* 0x01d0 286 (54 55) */ add %o1,%g4,%g4 +/* 0x01d4 287 (55 56) */ srax %g4,32,%g2 +/* 0x01d8 288 (55 57) */ ldx [%sp+2223],%g5 +/* 0x01dc 289 (56 57) */ add %o0,%o2,%o2 +/* 0x01e0 290 (56 57) */ st %g4,[%i0+20] +/* 0x01e4 291 (57 58) */ add %o2,%g2,%g2 +/* 0x01e8 292 (57 58) */ add %g5,%o3,%g5 +/* 0x01ec 293 (57 58) */ st %g2,[%i0+24] +/* 0x01f0 294 (58 59) */ srax %g2,32,%g3 +/* 0x01f4 295 (59 60) */ add %g5,%g3,%g2 +/* 0x01f8 296 (59 60) */ st %g2,[%i0+28] +/* 0x01fc 300 (60 61) */ srax %g2,32,%o3 +/* 0x0200 301 (61 62) */ srl %o3,0,%i0 +/* 0x0204 (62 64) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0208 (64 65) */ restore %g0,%g0,%g0 + +! +! ENTRY .L900000158 +! + + .L900000158: /* frequency 1.0 confidence 0.0 */ +/* 0x020c 308 ( 0 1) */ bne,a,pn %icc,.L900000157 ! tprob=0.50 +/* 0x0210 ( 0 1) */ st %i4,[%sp+2223] +/* 0x0214 315 ( 1 4) */ ldd [%o2],%f4 +/* 0x0218 316 ( 2 3) */ st %i4,[%sp+2351] +/* 0x021c 317 ( 3 6) */ ldd [%o0],%f8 +/* 0x0220 318 ( 3 5) */ fxnor %f0,%f4,%f4 +/* 0x0224 319 ( 4 7) */ ldd [%o2+8],%f10 +/* 0x0228 320 ( 5 8) */ ldd [%o0+8],%f14 +/* 0x022c 321 ( 5 8) */ fitod %f4,%f12 +/* 0x0230 322 ( 6 9) */ ld [%sp+2351],%f7 +/* 0x0234 323 ( 6 8) */ fxnor %f0,%f10,%f10 +/* 0x0238 324 ( 7 10) */ ldd [%o2+16],%f16 +/* 0x023c 325 ( 7 10) */ fitod %f5,%f4 +/* 0x0240 326 ( 8 11) */ ldd [%o2+24],%f18 +/* 0x0244 330 ( 9 12) */ ldd [%o2+32],%f20 +/* 0x0248 331 ( 9 11) */ fxnor %f0,%f16,%f16 +/* 0x024c 335 (10 13) */ ld [%i1],%g2 +/* 0x0250 336 (10 13) */ fsubd %f14,%f4,%f4 +/* 0x0254 337 (11 14) */ ldd [%o2+40],%f22 +/* 0x0258 338 (11 14) */ fitod %f16,%f28 +/* 0x025c 339 (12 15) */ ld [%i1+4],%g3 +/* 0x0260 340 (13 16) */ ld [%i1+8],%g4 +/* 0x0264 341 (13 15) */ fxnor %f0,%f22,%f22 +/* 0x0268 342 (14 17) */ ld [%i1+12],%g5 +/* 0x026c 343 (15 18) */ ld [%i1+16],%o0 +/* 0x0270 344 (16 19) */ ldd [%o2+48],%f24 +/* 0x0274 345 (17 20) */ ld [%i1+20],%o1 +/* 0x0278 346 (17 18) */ fmovs %f8,%f6 +/* 0x027c 347 (18 21) */ ldd [%o2+56],%f26 +/* 0x0280 348 (19 22) */ ld [%i1+24],%o2 +/* 0x0284 349 (19 22) */ fsubd %f6,%f8,%f6 +/* 0x0288 350 (20 23) */ ld [%i1+28],%o3 +/* 0x028c 351 (20 23) */ fsubd %f14,%f12,%f8 +/* 0x0290 355 (21 24) */ ld [%i1+32],%o4 +/* 0x0294 356 (21 24) */ fitod %f10,%f12 +/* 0x0298 357 (22 25) */ ld [%i1+36],%o7 +/* 0x029c 358 (22 25) */ fitod %f11,%f10 +/* 0x02a0 359 (22 25) */ fmuld %f4,%f6,%f4 +/* 0x02a4 360 (23 26) */ ld [%i1+40],%l1 +/* 0x02a8 361 (23 26) */ fmuld %f8,%f6,%f8 +/* 0x02ac 362 (24 27) */ ld [%i1+56],%l5 +/* 0x02b0 363 (24 27) */ fsubd %f14,%f12,%f12 +/* 0x02b4 364 (25 28) */ fsubd %f14,%f10,%f10 +/* 0x02b8 365 (26 29) */ fdtox %f8,%f8 +/* 0x02bc 366 (26 27) */ std %f8,[%sp+2343] +/* 0x02c0 367 (27 30) */ fitod %f17,%f8 +/* 0x02c4 368 (27 30) */ fmuld %f12,%f6,%f12 +/* 0x02c8 369 (28 31) */ fdtox %f4,%f4 +/* 0x02cc 370 (28 29) */ std %f4,[%sp+2335] +/* 0x02d0 371 (28 31) */ fmuld %f10,%f6,%f10 +/* 0x02d4 372 (29 31) */ fxnor %f0,%f18,%f16 +/* 0x02d8 373 (30 33) */ fdtox %f12,%f12 +/* 0x02dc 374 (30 31) */ std %f12,[%sp+2327] +/* 0x02e0 375 (31 33) */ ldx [%sp+2343],%o5 +/* 0x02e4 376 (31 34) */ fsubd %f14,%f8,%f8 +/* 0x02e8 377 (32 35) */ fsubd %f14,%f28,%f4 +/* 0x02ec 378 (33 36) */ fitod %f17,%f12 +/* 0x02f0 379 (33 34) */ add %o5,%g2,%g2 +/* 0x02f4 380 (33 34) */ st %g2,[%i0] +/* 0x02f8 381 (34 36) */ ldx [%sp+2335],%o5 +/* 0x02fc 382 (34 37) */ fitod %f16,%f18 +/* 0x0300 383 (34 35) */ srax %g2,32,%l0 +/* 0x0304 384 (35 37) */ fxnor %f0,%f20,%f16 +/* 0x0308 385 (35 38) */ fmuld %f8,%f6,%f20 +/* 0x030c 386 (36 39) */ fdtox %f10,%f10 +/* 0x0310 387 (36 37) */ std %f10,[%sp+2319] +/* 0x0314 388 (36 37) */ add %o5,%g3,%g3 +/* 0x0318 389 (36 39) */ fmuld %f4,%f6,%f4 +/* 0x031c 390 (37 40) */ fitod %f16,%f8 +/* 0x0320 391 (37 38) */ add %g3,%l0,%g3 +/* 0x0324 392 (37 38) */ st %g3,[%i0+4] +/* 0x0328 393 (38 40) */ ldx [%sp+2327],%o5 +/* 0x032c 394 (38 41) */ fsubd %f14,%f18,%f18 +/* 0x0330 395 (38 39) */ srax %g3,32,%l3 +/* 0x0334 396 (39 41) */ ldx [%sp+2319],%l2 +/* 0x0338 397 (39 42) */ fdtox %f4,%f4 +/* 0x033c 398 (40 41) */ std %f4,[%sp+2311] +/* 0x0340 399 (40 43) */ fdtox %f20,%f20 +/* 0x0344 400 (40 41) */ add %o5,%g4,%g4 +/* 0x0348 401 (41 42) */ std %f20,[%sp+2303] +/* 0x034c 402 (41 44) */ fsubd %f14,%f12,%f4 +/* 0x0350 403 (41 42) */ add %g4,%l3,%g4 +/* 0x0354 404 (41 44) */ fmuld %f18,%f6,%f18 +/* 0x0358 405 (42 43) */ st %g4,[%i0+8] +/* 0x035c 406 (42 45) */ fitod %f17,%f16 +/* 0x0360 407 (42 43) */ srax %g4,32,%l4 +/* 0x0364 408 (43 46) */ ld [%i1+44],%l0 +/* 0x0368 409 (43 46) */ fsubd %f14,%f8,%f20 +/* 0x036c 410 (43 44) */ add %l2,%g5,%l2 +/* 0x0370 411 (44 46) */ ldx [%sp+2311],%g5 +/* 0x0374 412 (44 47) */ fitod %f22,%f8 +/* 0x0378 413 (44 45) */ add %l2,%l4,%l2 +/* 0x037c 414 (44 47) */ fmuld %f4,%f6,%f4 +/* 0x0380 415 (45 46) */ st %l2,[%i0+12] +/* 0x0384 416 (45 48) */ fsubd %f14,%f16,%f10 +/* 0x0388 417 (46 49) */ ld [%i1+52],%l3 +/* 0x038c 418 (46 49) */ fdtox %f18,%f18 +/* 0x0390 419 (46 47) */ add %g5,%o0,%l4 +/* 0x0394 420 (46 49) */ fmuld %f20,%f6,%f12 +/* 0x0398 421 (47 48) */ std %f18,[%sp+2295] +/* 0x039c 422 (47 48) */ srax %l2,32,%o0 +/* 0x03a0 423 (47 50) */ fitod %f23,%f16 +/* 0x03a4 424 (48 51) */ ld [%i1+48],%o5 +/* 0x03a8 425 (48 51) */ fsubd %f14,%f8,%f8 +/* 0x03ac 426 (48 49) */ add %l4,%o0,%l4 +/* 0x03b0 427 (49 50) */ st %l4,[%i0+16] +/* 0x03b4 428 (49 50) */ srax %l4,32,%o0 +/* 0x03b8 429 (49 51) */ fxnor %f0,%f24,%f18 +/* 0x03bc 430 (50 52) */ ldx [%sp+2303],%g5 +/* 0x03c0 431 (50 53) */ fdtox %f4,%f4 +/* 0x03c4 432 (51 52) */ std %f4,[%sp+2287] +/* 0x03c8 433 (51 54) */ fdtox %f12,%f12 +/* 0x03cc 434 (51 54) */ fmuld %f10,%f6,%f4 +/* 0x03d0 435 (52 53) */ std %f12,[%sp+2279] +/* 0x03d4 436 (52 55) */ fsubd %f14,%f16,%f12 +/* 0x03d8 437 (52 53) */ add %g5,%o1,%g2 +/* 0x03dc 438 (52 55) */ fmuld %f8,%f6,%f8 +/* 0x03e0 439 (53 55) */ ldx [%sp+2295],%g5 +/* 0x03e4 440 (53 56) */ fitod %f18,%f10 +/* 0x03e8 441 (53 54) */ add %g2,%o0,%g2 +/* 0x03ec 442 (54 55) */ st %g2,[%i0+20] +/* 0x03f0 443 (54 57) */ fitod %f19,%f16 +/* 0x03f4 444 (54 55) */ srax %g2,32,%o0 +/* 0x03f8 445 (55 58) */ fdtox %f8,%f8 +/* 0x03fc 446 (55 56) */ std %f8,[%sp+2263] +/* 0x0400 447 (55 56) */ add %g5,%o2,%g3 +/* 0x0404 448 (56 58) */ ldx [%sp+2287],%g5 +/* 0x0408 449 (56 59) */ fsubd %f14,%f10,%f10 +/* 0x040c 450 (56 57) */ add %g3,%o0,%g3 +/* 0x0410 451 (57 58) */ st %g3,[%i0+24] +/* 0x0414 452 (57 60) */ fsubd %f14,%f16,%f8 +/* 0x0418 453 (57 58) */ srax %g3,32,%o0 +/* 0x041c 454 (58 61) */ fdtox %f4,%f4 +/* 0x0420 455 (58 59) */ std %f4,[%sp+2271] +/* 0x0424 456 (58 59) */ add %g5,%o3,%g4 +/* 0x0428 457 (59 61) */ fxnor %f0,%f26,%f18 +/* 0x042c 458 (59 62) */ fmuld %f12,%f6,%f4 +/* 0x0430 459 (59 60) */ add %g4,%o0,%g4 +/* 0x0434 460 (60 61) */ st %g4,[%i0+28] +/* 0x0438 461 (60 63) */ fmuld %f10,%f6,%f10 +/* 0x043c 462 (60 61) */ srax %g4,32,%o0 +/* 0x0440 463 (61 63) */ ldx [%sp+2279],%g5 +/* 0x0444 464 (61 64) */ fitod %f18,%f12 +/* 0x0448 465 (61 64) */ fmuld %f8,%f6,%f8 +/* 0x044c 466 (62 65) */ fdtox %f4,%f4 +/* 0x0450 467 (62 63) */ std %f4,[%sp+2255] +/* 0x0454 468 (63 64) */ add %g5,%o4,%l2 +/* 0x0458 469 (63 65) */ ldx [%sp+2271],%g5 +/* 0x045c 470 (63 66) */ fdtox %f10,%f16 +/* 0x0460 471 (64 67) */ fsubd %f14,%f12,%f4 +/* 0x0464 472 (64 65) */ std %f16,[%sp+2247] +/* 0x0468 473 (64 65) */ add %l2,%o0,%l2 +/* 0x046c 474 (65 68) */ fdtox %f8,%f8 +/* 0x0470 475 (65 66) */ std %f8,[%sp+2239] +/* 0x0474 476 (65 66) */ add %g5,%o7,%l4 +/* 0x0478 477 (66 69) */ fitod %f19,%f10 +/* 0x047c 478 (66 68) */ ldx [%sp+2263],%g5 +/* 0x0480 479 (66 67) */ srax %l2,32,%o0 +/* 0x0484 480 (67 68) */ add %l4,%o0,%l4 +/* 0x0488 481 (67 70) */ fmuld %f4,%f6,%f4 +/* 0x048c 482 (67 69) */ ldx [%sp+2255],%o0 +/* 0x0490 483 (68 69) */ srax %l4,32,%o1 +/* 0x0494 484 (68 69) */ add %g5,%l1,%l1 +/* 0x0498 485 (68 69) */ st %l2,[%i0+32] +/* 0x049c 486 (69 72) */ fsubd %f14,%f10,%f8 +/* 0x04a0 487 (69 71) */ ldx [%sp+2239],%o3 +/* 0x04a4 488 (69 70) */ add %l1,%o1,%o1 +/* 0x04a8 489 (70 72) */ ldx [%sp+2247],%g5 +/* 0x04ac 490 (70 71) */ srax %o1,32,%o2 +/* 0x04b0 491 (70 71) */ add %o0,%l0,%o0 +/* 0x04b4 492 (71 74) */ fdtox %f4,%f4 +/* 0x04b8 493 (71 72) */ std %f4,[%sp+2231] +/* 0x04bc 494 (71 72) */ add %o0,%o2,%o2 +/* 0x04c0 495 (72 73) */ add %o3,%l3,%l3 +/* 0x04c4 496 (72 75) */ fmuld %f8,%f6,%f4 +/* 0x04c8 497 (72 73) */ add %g5,%o5,%g5 +/* 0x04cc 498 (73 74) */ srax %o2,32,%o3 +/* 0x04d0 499 (73 74) */ st %l4,[%i0+36] +/* 0x04d4 500 (74 75) */ add %g5,%o3,%g2 +/* 0x04d8 501 (74 76) */ ldx [%sp+2231],%o0 +/* 0x04dc 502 (75 76) */ srax %g2,32,%g3 +/* 0x04e0 503 (75 78) */ fdtox %f4,%f4 +/* 0x04e4 504 (75 76) */ std %f4,[%sp+2223] +/* 0x04e8 505 (76 77) */ st %o1,[%i0+40] +/* 0x04ec 506 (76 77) */ add %l3,%g3,%g3 +/* 0x04f0 507 (76 77) */ add %o0,%l5,%g5 +/* 0x04f4 508 (77 78) */ st %o2,[%i0+44] +/* 0x04f8 509 (77 78) */ srax %g3,32,%g4 +/* 0x04fc 510 (78 79) */ st %g2,[%i0+48] +/* 0x0500 511 (78 79) */ add %g5,%g4,%g4 +/* 0x0504 512 (79 80) */ st %g3,[%i0+52] +/* 0x0508 513 (79 80) */ srax %g4,32,%g5 +/* 0x050c 514 (80 83) */ ld [%i1+60],%g3 +/* 0x0510 515 (81 83) */ ldx [%sp+2223],%g2 +/* 0x0514 516 (82 83) */ st %g4,[%i0+56] +/* 0x0518 517 (83 84) */ add %g2,%g3,%g2 +/* 0x051c 518 (84 85) */ add %g2,%g5,%g2 +/* 0x0520 519 (84 85) */ st %g2,[%i0+60] +/* 0x0524 523 (85 86) */ srax %g2,32,%o3 +/* 0x0528 524 (86 87) */ srl %o3,0,%i0 +/* 0x052c (87 89) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0530 (89 90) */ restore %g0,%g0,%g0 + +! +! ENTRY .L900000157 +! + + .L900000157: /* frequency 1.0 confidence 0.0 */ +/* 0x0534 532 ( 0 1) */ fmovd %f0,%f14 +/* 0x0538 533 ( 0 3) */ ldd [%o0],%f8 +/* 0x053c 539 ( 0 1) */ add %i3,1,%g2 +/* 0x0540 540 ( 1 4) */ ld [%sp+2223],%f7 +/* 0x0544 541 ( 1 2) */ srl %g2,31,%g3 +/* 0x0548 545 ( 1 2) */ add %fp,-217,%g4 +/* 0x054c 546 ( 2 3) */ add %g2,%g3,%g2 +/* 0x0550 547 ( 2 3) */ or %g0,0,%g5 +/* 0x0554 548 ( 2 5) */ ldd [%o0+8],%f18 +/* 0x0558 549 ( 3 4) */ fmovs %f8,%f6 +/* 0x055c 550 ( 3 4) */ sra %g2,1,%o1 +/* 0x0560 551 ( 3 4) */ or %g0,0,%o0 +/* 0x0564 552 ( 4 5) */ subcc %o1,0,%g0 +/* 0x0568 553 ( 5 6) */ or %g0,%o1,%o3 +/* 0x056c 554 ( 5 8) */ fsubd %f6,%f8,%f16 +/* 0x0570 555 ( 5 6) */ ble,pt %icc,.L900000156 ! tprob=0.50 +/* 0x0574 ( 6 7) */ subcc %i3,0,%g0 +/* 0x0578 557 ( 6 7) */ sub %o1,1,%g2 +/* 0x057c 558 ( 7 8) */ or %g0,0,%i0 +/* 0x0580 559 ( 7 8) */ or %g0,1,%g3 +/* 0x0584 560 ( 8 9) */ subcc %o3,10,%g0 +/* 0x0588 561 ( 8 9) */ bl,pn %icc,.L77000077 ! tprob=0.50 +/* 0x058c ( 9 10) */ or %g0,0,%o1 +/* 0x0590 563 ( 9 12) */ ldd [%i2+8],%f0 +/* 0x0594 564 ( 9 10) */ sub %o3,3,%o3 +/* 0x0598 565 (10 13) */ ldd [%i2],%f2 +/* 0x059c 566 (10 11) */ or %g0,7,%o0 +/* 0x05a0 567 (10 11) */ or %g0,2,%i0 +/* 0x05a4 568 (11 13) */ fxnor %f14,%f0,%f8 +/* 0x05a8 569 (11 14) */ ldd [%i2+16],%f4 +/* 0x05ac 570 (11 12) */ or %g0,16,%o2 +/* 0x05b0 571 (12 14) */ fxnor %f14,%f2,%f2 +/* 0x05b4 572 (12 15) */ ldd [%i2+24],%f6 +/* 0x05b8 573 (12 13) */ or %g0,48,%o4 +/* 0x05bc 574 (13 16) */ fitod %f8,%f12 +/* 0x05c0 575 (13 14) */ or %g0,24,%o1 +/* 0x05c4 576 (13 14) */ or %g0,3,%g3 +/* 0x05c8 577 (14 17) */ fitod %f2,%f0 +/* 0x05cc 578 (15 18) */ fitod %f3,%f20 +/* 0x05d0 579 (15 18) */ ldd [%i2+32],%f2 +/* 0x05d4 580 (16 19) */ fitod %f9,%f10 +/* 0x05d8 581 (16 19) */ ldd [%i2+40],%f8 +/* 0x05dc 582 (17 20) */ fsubd %f18,%f0,%f0 +/* 0x05e0 583 (18 21) */ fsubd %f18,%f20,%f22 +/* 0x05e4 584 (19 22) */ fsubd %f18,%f12,%f20 +/* 0x05e8 585 (19 22) */ ldd [%i2+48],%f12 +/* 0x05ec 586 (20 23) */ fsubd %f18,%f10,%f10 +/* 0x05f0 587 (20 23) */ fmuld %f0,%f16,%f0 +/* 0x05f4 588 (21 23) */ fxnor %f14,%f4,%f4 +/* 0x05f8 589 (21 24) */ fmuld %f22,%f16,%f22 +/* 0x05fc 590 (22 24) */ fxnor %f14,%f6,%f6 +/* 0x0600 591 (22 25) */ fmuld %f20,%f16,%f20 +/* 0x0604 592 (23 26) */ fdtox %f0,%f0 +/* 0x0608 593 (23 24) */ std %f0,[%fp-217] +/* 0x060c 594 (23 26) */ fmuld %f10,%f16,%f10 +/* 0x0610 595 (24 27) */ fdtox %f22,%f22 +/* 0x0614 596 (24 25) */ std %f22,[%fp-209] +/* 0x0618 597 (25 28) */ fitod %f5,%f0 +/* 0x061c 598 (26 29) */ fdtox %f10,%f10 +/* 0x0620 599 (27 30) */ fdtox %f20,%f20 +/* 0x0624 600 (27 28) */ std %f20,[%fp-201] +/* 0x0628 601 (28 31) */ fitod %f4,%f4 +/* 0x062c 602 (28 29) */ std %f10,[%fp-193] +/* 0x0630 603 (29 31) */ fxnor %f14,%f2,%f10 +/* 0x0634 604 (30 33) */ fitod %f7,%f2 +/* 0x0638 605 (31 34) */ fsubd %f18,%f0,%f0 +/* 0x063c 606 (32 35) */ fsubd %f18,%f4,%f4 +/* 0x0640 607 (33 35) */ fxnor %f14,%f8,%f8 + +! +! ENTRY .L900000144 +! + + .L900000144: /* frequency 1.0 confidence 0.0 */ +/* 0x0644 609 ( 0 3) */ fitod %f11,%f22 +/* 0x0648 610 ( 0 1) */ add %o0,3,%o0 +/* 0x064c 611 ( 0 1) */ add %g3,6,%g3 +/* 0x0650 612 ( 0 3) */ fmuld %f0,%f16,%f0 +/* 0x0654 613 ( 1 4) */ fmuld %f4,%f16,%f24 +/* 0x0658 614 ( 1 2) */ subcc %o0,%o3,%g0 +/* 0x065c 615 ( 1 2) */ add %i0,6,%i0 +/* 0x0660 616 ( 1 4) */ fsubd %f18,%f2,%f2 +/* 0x0664 617 ( 2 5) */ fitod %f6,%f4 +/* 0x0668 618 ( 3 6) */ fdtox %f0,%f0 +/* 0x066c 619 ( 3 4) */ add %o4,8,%i1 +/* 0x0670 620 ( 4 7) */ ldd [%i2+%i1],%f20 +/* 0x0674 621 ( 4 7) */ fdtox %f24,%f6 +/* 0x0678 622 ( 4 5) */ add %o2,16,%o4 +/* 0x067c 623 ( 5 8) */ fsubd %f18,%f4,%f4 +/* 0x0680 624 ( 5 6) */ std %f6,[%o4+%g4] +/* 0x0684 625 ( 5 6) */ add %o1,16,%o2 +/* 0x0688 626 ( 6 8) */ fxnor %f14,%f12,%f6 +/* 0x068c 627 ( 6 7) */ std %f0,[%o2+%g4] +/* 0x0690 628 ( 7 10) */ fitod %f9,%f0 +/* 0x0694 629 ( 7 10) */ fmuld %f2,%f16,%f2 +/* 0x0698 630 ( 8 11) */ fmuld %f4,%f16,%f24 +/* 0x069c 631 ( 8 11) */ fsubd %f18,%f22,%f12 +/* 0x06a0 632 ( 9 12) */ fitod %f10,%f4 +/* 0x06a4 633 (10 13) */ fdtox %f2,%f2 +/* 0x06a8 634 (10 11) */ add %i1,8,%o1 +/* 0x06ac 635 (11 14) */ ldd [%i2+%o1],%f22 +/* 0x06b0 636 (11 14) */ fdtox %f24,%f10 +/* 0x06b4 637 (11 12) */ add %o4,16,%i4 +/* 0x06b8 638 (12 15) */ fsubd %f18,%f4,%f4 +/* 0x06bc 639 (12 13) */ std %f10,[%i4+%g4] +/* 0x06c0 640 (12 13) */ add %o2,16,%i1 +/* 0x06c4 641 (13 15) */ fxnor %f14,%f20,%f10 +/* 0x06c8 642 (13 14) */ std %f2,[%i1+%g4] +/* 0x06cc 643 (14 17) */ fitod %f7,%f2 +/* 0x06d0 644 (14 17) */ fmuld %f12,%f16,%f12 +/* 0x06d4 645 (15 18) */ fmuld %f4,%f16,%f24 +/* 0x06d8 646 (15 18) */ fsubd %f18,%f0,%f0 +/* 0x06dc 647 (16 19) */ fitod %f8,%f4 +/* 0x06e0 648 (17 20) */ fdtox %f12,%f20 +/* 0x06e4 649 (17 18) */ add %o1,8,%o4 +/* 0x06e8 650 (18 21) */ ldd [%i2+%o4],%f12 +/* 0x06ec 651 (18 21) */ fdtox %f24,%f8 +/* 0x06f0 652 (18 19) */ add %i4,16,%o2 +/* 0x06f4 653 (19 22) */ fsubd %f18,%f4,%f4 +/* 0x06f8 654 (19 20) */ std %f8,[%o2+%g4] +/* 0x06fc 655 (19 20) */ add %i1,16,%o1 +/* 0x0700 656 (20 22) */ fxnor %f14,%f22,%f8 +/* 0x0704 657 (20 21) */ ble,pt %icc,.L900000144 ! tprob=0.50 +/* 0x0708 (20 21) */ std %f20,[%o1+%g4] + +! +! ENTRY .L900000147 +! + + .L900000147: /* frequency 1.0 confidence 0.0 */ +/* 0x070c 660 ( 0 3) */ fitod %f6,%f6 +/* 0x0710 661 ( 0 3) */ fmuld %f4,%f16,%f24 +/* 0x0714 662 ( 0 1) */ add %i4,32,%l4 +/* 0x0718 663 ( 1 4) */ fsubd %f18,%f2,%f2 +/* 0x071c 664 ( 1 4) */ fmuld %f0,%f16,%f22 +/* 0x0720 665 ( 1 2) */ add %i1,32,%l3 +/* 0x0724 666 ( 2 5) */ fitod %f10,%f28 +/* 0x0728 667 ( 2 3) */ sra %o0,0,%o2 +/* 0x072c 668 ( 2 3) */ add %i4,48,%l2 +/* 0x0730 669 ( 3 6) */ fsubd %f18,%f6,%f4 +/* 0x0734 670 ( 3 4) */ add %i1,48,%l1 +/* 0x0738 671 ( 3 4) */ add %i4,64,%l0 +/* 0x073c 672 ( 4 7) */ fitod %f11,%f26 +/* 0x0740 673 ( 4 5) */ sllx %o2,3,%o1 +/* 0x0744 674 ( 4 5) */ add %i1,64,%i5 +/* 0x0748 675 ( 5 8) */ fitod %f8,%f6 +/* 0x074c 676 ( 5 6) */ add %i4,80,%i4 +/* 0x0750 677 ( 5 6) */ add %i1,80,%i1 +/* 0x0754 678 ( 6 8) */ fxnor %f14,%f12,%f0 +/* 0x0758 679 ( 6 9) */ fmuld %f4,%f16,%f20 +/* 0x075c 680 ( 6 7) */ add %i4,16,%o4 +/* 0x0760 681 ( 7 10) */ fitod %f9,%f4 +/* 0x0764 682 ( 7 10) */ fmuld %f2,%f16,%f12 +/* 0x0768 683 ( 7 8) */ add %i1,16,%o3 +/* 0x076c 684 ( 8 11) */ fsubd %f18,%f28,%f10 +/* 0x0770 685 ( 8 9) */ subcc %o0,%g2,%g0 +/* 0x0774 686 ( 8 9) */ add %g3,12,%g3 +/* 0x0778 687 ( 9 12) */ fitod %f0,%f2 +/* 0x077c 688 (10 13) */ fsubd %f18,%f26,%f8 +/* 0x0780 689 (11 14) */ fitod %f1,%f0 +/* 0x0784 690 (11 14) */ fmuld %f10,%f16,%f10 +/* 0x0788 691 (12 15) */ fdtox %f24,%f24 +/* 0x078c 692 (12 13) */ std %f24,[%l4+%g4] +/* 0x0790 693 (12 13) */ add %i0,12,%i0 +/* 0x0794 694 (13 16) */ fsubd %f18,%f6,%f6 +/* 0x0798 695 (13 16) */ fmuld %f8,%f16,%f8 +/* 0x079c 696 (14 17) */ fdtox %f22,%f22 +/* 0x07a0 697 (14 15) */ std %f22,[%l3+%g4] +/* 0x07a4 698 (15 18) */ fsubd %f18,%f4,%f4 +/* 0x07a8 699 (16 19) */ fdtox %f20,%f20 +/* 0x07ac 700 (16 17) */ std %f20,[%l2+%g4] +/* 0x07b0 701 (16 19) */ fmuld %f6,%f16,%f6 +/* 0x07b4 702 (17 20) */ fsubd %f18,%f2,%f2 +/* 0x07b8 703 (18 21) */ fsubd %f18,%f0,%f0 +/* 0x07bc 704 (18 21) */ fmuld %f4,%f16,%f4 +/* 0x07c0 705 (19 22) */ fdtox %f12,%f12 +/* 0x07c4 706 (19 20) */ std %f12,[%l1+%g4] +/* 0x07c8 707 (20 23) */ fdtox %f10,%f10 +/* 0x07cc 708 (20 21) */ std %f10,[%l0+%g4] +/* 0x07d0 709 (20 23) */ fmuld %f2,%f16,%f2 +/* 0x07d4 710 (21 24) */ fdtox %f8,%f8 +/* 0x07d8 711 (21 22) */ std %f8,[%i5+%g4] +/* 0x07dc 712 (21 24) */ fmuld %f0,%f16,%f0 +/* 0x07e0 713 (22 25) */ fdtox %f6,%f6 +/* 0x07e4 714 (22 23) */ std %f6,[%i4+%g4] +/* 0x07e8 715 (23 26) */ fdtox %f4,%f4 +/* 0x07ec 716 (23 24) */ std %f4,[%i1+%g4] +/* 0x07f0 717 (24 27) */ fdtox %f2,%f2 +/* 0x07f4 718 (24 25) */ std %f2,[%o4+%g4] +/* 0x07f8 719 (25 28) */ fdtox %f0,%f0 +/* 0x07fc 720 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50 +/* 0x0800 (25 26) */ std %f0,[%o3+%g4] + +! +! ENTRY .L77000077 +! + + .L77000077: /* frequency 1.0 confidence 0.0 */ +/* 0x0804 723 ( 0 3) */ ldd [%i2+%o1],%f0 + +! +! ENTRY .L900000155 +! + + .L900000155: /* frequency 1.0 confidence 0.0 */ +/* 0x0808 725 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x080c 726 ( 0 1) */ sra %i0,0,%o1 +/* 0x0810 727 ( 0 1) */ add %o0,1,%o0 +/* 0x0814 728 ( 1 2) */ sllx %o1,3,%i4 +/* 0x0818 729 ( 1 2) */ add %i0,2,%i0 +/* 0x081c 730 ( 2 5) */ fitod %f0,%f2 +/* 0x0820 731 ( 2 3) */ sra %g3,0,%o1 +/* 0x0824 732 ( 2 3) */ add %g3,2,%g3 +/* 0x0828 733 ( 3 6) */ fitod %f1,%f0 +/* 0x082c 734 ( 3 4) */ sllx %o1,3,%i1 +/* 0x0830 735 ( 3 4) */ subcc %o0,%g2,%g0 +/* 0x0834 736 ( 4 5) */ sra %o0,0,%o2 +/* 0x0838 737 ( 5 8) */ fsubd %f18,%f2,%f2 +/* 0x083c 738 ( 5 6) */ sllx %o2,3,%o1 +/* 0x0840 739 ( 6 9) */ fsubd %f18,%f0,%f0 +/* 0x0844 740 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x0848 741 ( 9 12) */ fmuld %f0,%f16,%f0 +/* 0x084c 742 (11 14) */ fdtox %f2,%f2 +/* 0x0850 743 (11 12) */ std %f2,[%i4+%g4] +/* 0x0854 744 (12 15) */ fdtox %f0,%f0 +/* 0x0858 745 (12 13) */ std %f0,[%i1+%g4] +/* 0x085c 746 (12 13) */ ble,a,pt %icc,.L900000155 ! tprob=0.50 +/* 0x0860 (14 17) */ ldd [%i2+%o1],%f0 + +! +! ENTRY .L77000043 +! + + .L77000043: /* frequency 1.0 confidence 0.0 */ +/* 0x0864 754 ( 0 1) */ subcc %i3,0,%g0 + +! +! ENTRY .L900000156 +! + + .L900000156: /* frequency 1.0 confidence 0.0 */ +/* 0x0868 756 ( 0 1) */ ble,a,pt %icc,.L77000061 ! tprob=0.50 +/* 0x086c ( 0 1) */ or %g0,%g5,%o3 +/* 0x0870 761 ( 0 2) */ ldx [%fp-209],%i1 +/* 0x0874 762 ( 1 2) */ sub %i3,1,%g3 +/* 0x0878 763 ( 1 2) */ or %g0,0,%i0 +/* 0x087c 764 ( 2 3) */ subcc %i3,5,%g0 +/* 0x0880 765 ( 2 3) */ bl,pn %icc,.L77000078 ! tprob=0.50 +/* 0x0884 ( 2 4) */ ldx [%fp-217],%i2 +/* 0x0888 767 ( 3 6) */ ld [%o5],%i3 +/* 0x088c 768 ( 3 4) */ or %g0,8,%g2 +/* 0x0890 769 ( 3 4) */ or %g0,16,%o4 +/* 0x0894 770 ( 4 5) */ sub %g3,1,%o3 +/* 0x0898 771 ( 4 5) */ or %g0,3,%i0 +/* 0x089c 772 ( 5 6) */ add %i2,%i3,%o1 +/* 0x08a0 773 ( 5 8) */ ld [%o5+4],%i2 +/* 0x08a4 774 ( 6 7) */ st %o1,[%o7] +/* 0x08a8 775 ( 6 7) */ srax %o1,32,%o1 +/* 0x08ac 776 ( 7 9) */ ldx [%fp-201],%o2 +/* 0x08b0 777 ( 7 8) */ add %i1,%i2,%o0 +/* 0x08b4 778 ( 7 8) */ or %g0,%o1,%i1 +/* 0x08b8 779 ( 8 11) */ ld [%o5+8],%o1 +/* 0x08bc 780 ( 8 9) */ add %o0,%i1,%o0 +/* 0x08c0 781 ( 9 10) */ st %o0,[%o7+4] +/* 0x08c4 782 ( 9 10) */ srax %o0,32,%o0 + +! +! ENTRY .L900000140 +! + + .L900000140: /* frequency 1.0 confidence 0.0 */ +/* 0x08c8 784 ( 0 1) */ add %g2,4,%i1 +/* 0x08cc 785 ( 0 1) */ add %o4,8,%o4 +/* 0x08d0 786 ( 1 3) */ ldx [%o4+%g4],%i2 +/* 0x08d4 787 ( 1 2) */ sra %o0,0,%g5 +/* 0x08d8 788 ( 1 2) */ add %o2,%o1,%o1 +/* 0x08dc 789 ( 2 5) */ ld [%o5+%i1],%o0 +/* 0x08e0 790 ( 2 3) */ add %o1,%g5,%o1 +/* 0x08e4 791 ( 2 3) */ add %i0,2,%i0 +/* 0x08e8 792 ( 3 4) */ st %o1,[%o7+%g2] +/* 0x08ec 793 ( 3 4) */ srax %o1,32,%g5 +/* 0x08f0 794 ( 3 4) */ subcc %i0,%o3,%g0 +/* 0x08f4 795 ( 4 5) */ add %g2,8,%g2 +/* 0x08f8 796 ( 4 5) */ add %o4,8,%o4 +/* 0x08fc 797 ( 5 7) */ ldx [%o4+%g4],%o2 +/* 0x0900 798 ( 5 6) */ add %i2,%o0,%o0 +/* 0x0904 799 ( 6 9) */ ld [%o5+%g2],%o1 +/* 0x0908 800 ( 6 7) */ add %o0,%g5,%o0 +/* 0x090c 801 ( 7 8) */ st %o0,[%o7+%i1] +/* 0x0910 802 ( 7 8) */ ble,pt %icc,.L900000140 ! tprob=0.50 +/* 0x0914 ( 7 8) */ srax %o0,32,%o0 + +! +! ENTRY .L900000143 +! + + .L900000143: /* frequency 1.0 confidence 0.0 */ +/* 0x0918 805 ( 0 1) */ sra %o0,0,%o3 +/* 0x091c 806 ( 0 1) */ add %o2,%o1,%o0 +/* 0x0920 807 ( 1 2) */ add %o0,%o3,%o0 +/* 0x0924 808 ( 1 2) */ st %o0,[%o7+%g2] +/* 0x0928 809 ( 1 2) */ subcc %i0,%g3,%g0 +/* 0x092c 810 ( 2 3) */ srax %o0,32,%g5 +/* 0x0930 811 ( 2 3) */ bg,a,pn %icc,.L77000061 ! tprob=0.50 +/* 0x0934 ( 3 4) */ or %g0,%g5,%o3 + +! +! ENTRY .L77000078 +! + + .L77000078: /* frequency 1.0 confidence 0.0 */ +/* 0x0938 814 ( 0 1) */ sra %i0,0,%o0 + +! +! ENTRY .L900000154 +! + + .L900000154: /* frequency 1.0 confidence 0.0 */ +/* 0x093c 816 ( 0 1) */ sllx %o0,2,%g2 +/* 0x0940 817 ( 0 1) */ add %i0,1,%i0 +/* 0x0944 818 ( 1 2) */ sllx %o0,3,%o4 +/* 0x0948 819 ( 1 4) */ ld [%o5+%g2],%o2 +/* 0x094c 820 ( 1 2) */ subcc %i0,%g3,%g0 +/* 0x0950 821 ( 2 4) */ ldx [%o4+%g4],%o0 +/* 0x0954 822 ( 2 3) */ sra %g5,0,%o1 +/* 0x0958 823 ( 4 5) */ add %o0,%o2,%o0 +/* 0x095c 824 ( 5 6) */ add %o0,%o1,%o0 +/* 0x0960 825 ( 5 6) */ st %o0,[%o7+%g2] +/* 0x0964 826 ( 6 7) */ srax %o0,32,%g5 +/* 0x0968 827 ( 6 7) */ ble,pt %icc,.L900000154 ! tprob=0.50 +/* 0x096c ( 7 8) */ sra %i0,0,%o0 + +! +! ENTRY .L77000047 +! + + .L77000047: /* frequency 1.0 confidence 0.0 */ +/* 0x0970 834 ( 0 1) */ or %g0,%g5,%o3 + +! +! ENTRY .L77000061 +! + + .L77000061: /* frequency 1.0 confidence 0.0 */ + +/* 0x0974 835 ( 1 2) */ srl %o3,0,%i0 +/* 0x0978 ( 2 4) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x097c ( 4 5) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000048 +! + + .L77000048: /* frequency 1.0 confidence 0.0 */ +/* 0x0980 844 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50 +/* 0x0984 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0988 854 ( 0 3) */ ldd [%o2],%f4 +/* 0x098c 855 ( 1 4) */ ldd [%o0],%f6 +/* 0x0990 856 ( 1 2) */ srl %i4,19,%g3 +/* 0x0994 857 ( 1 2) */ andn %i4,%g2,%g2 +/* 0x0998 858 ( 2 3) */ st %g3,[%sp+2351] +/* 0x099c 859 ( 2 4) */ fxnor %f0,%f4,%f4 +/* 0x09a0 860 ( 3 4) */ st %g2,[%sp+2355] +/* 0x09a4 861 ( 4 7) */ ldd [%o2+8],%f12 +/* 0x09a8 862 ( 4 7) */ fitod %f4,%f10 +/* 0x09ac 863 ( 5 8) */ ldd [%o0+8],%f16 +/* 0x09b0 864 ( 5 8) */ fitod %f5,%f4 +/* 0x09b4 865 ( 6 9) */ ldd [%o2+16],%f18 +/* 0x09b8 866 ( 6 8) */ fxnor %f0,%f12,%f12 +/* 0x09bc 867 ( 7 10) */ ld [%sp+2351],%f9 +/* 0x09c0 868 ( 7 10) */ fsubd %f16,%f10,%f10 +/* 0x09c4 869 ( 8 11) */ ld [%sp+2355],%f15 +/* 0x09c8 870 ( 8 11) */ fitod %f12,%f22 +/* 0x09cc 871 ( 9 12) */ ldd [%o2+24],%f20 +/* 0x09d0 872 ( 9 12) */ fitod %f13,%f12 +/* 0x09d4 876 (10 13) */ ld [%i1],%g2 +/* 0x09d8 877 (10 13) */ fsubd %f16,%f4,%f4 +/* 0x09dc 878 (11 14) */ ld [%i1+4],%g3 +/* 0x09e0 879 (11 14) */ fsubd %f16,%f22,%f22 +/* 0x09e4 880 (12 15) */ ld [%i1+8],%g4 +/* 0x09e8 881 (12 14) */ fxnor %f0,%f18,%f18 +/* 0x09ec 882 (13 16) */ ld [%i1+12],%g5 +/* 0x09f0 883 (13 16) */ fsubd %f16,%f12,%f12 +/* 0x09f4 884 (14 17) */ ld [%i1+16],%o0 +/* 0x09f8 885 (14 17) */ fitod %f18,%f26 +/* 0x09fc 886 (15 18) */ ld [%i1+20],%o1 +/* 0x0a00 887 (15 17) */ fxnor %f0,%f20,%f20 +/* 0x0a04 888 (16 19) */ ld [%i1+24],%o2 +/* 0x0a08 889 (17 20) */ ld [%i1+28],%o3 +/* 0x0a0c 890 (19 20) */ fmovs %f6,%f8 +/* 0x0a10 891 (20 21) */ fmovs %f6,%f14 +/* 0x0a14 892 (22 25) */ fsubd %f8,%f6,%f8 +/* 0x0a18 893 (23 26) */ fsubd %f14,%f6,%f6 +/* 0x0a1c 894 (25 28) */ fmuld %f10,%f8,%f14 +/* 0x0a20 895 (26 29) */ fmuld %f10,%f6,%f10 +/* 0x0a24 896 (27 30) */ fmuld %f4,%f8,%f24 +/* 0x0a28 897 (28 31) */ fdtox %f14,%f14 +/* 0x0a2c 898 (28 29) */ std %f14,[%sp+2335] +/* 0x0a30 899 (28 31) */ fmuld %f22,%f8,%f28 +/* 0x0a34 900 (29 32) */ fitod %f19,%f14 +/* 0x0a38 901 (29 32) */ fmuld %f22,%f6,%f18 +/* 0x0a3c 902 (30 33) */ fdtox %f10,%f10 +/* 0x0a40 903 (30 31) */ std %f10,[%sp+2343] +/* 0x0a44 904 (30 33) */ fmuld %f4,%f6,%f4 +/* 0x0a48 905 (31 34) */ fmuld %f12,%f8,%f22 +/* 0x0a4c 906 (32 35) */ fdtox %f18,%f18 +/* 0x0a50 907 (32 33) */ std %f18,[%sp+2311] +/* 0x0a54 908 (32 35) */ fmuld %f12,%f6,%f10 +/* 0x0a58 909 (33 35) */ ldx [%sp+2335],%o4 +/* 0x0a5c 910 (33 36) */ fdtox %f24,%f12 +/* 0x0a60 911 (34 35) */ std %f12,[%sp+2319] +/* 0x0a64 912 (34 37) */ fsubd %f16,%f26,%f12 +/* 0x0a68 913 (35 37) */ ldx [%sp+2343],%o5 +/* 0x0a6c 914 (35 36) */ sllx %o4,19,%o4 +/* 0x0a70 915 (35 38) */ fdtox %f4,%f4 +/* 0x0a74 916 (36 37) */ std %f4,[%sp+2327] +/* 0x0a78 917 (36 39) */ fdtox %f28,%f24 +/* 0x0a7c 918 (37 38) */ std %f24,[%sp+2303] +/* 0x0a80 919 (37 40) */ fitod %f20,%f4 +/* 0x0a84 920 (37 38) */ add %o5,%o4,%o4 +/* 0x0a88 921 (37 40) */ fmuld %f12,%f8,%f24 +/* 0x0a8c 922 (38 40) */ ldx [%sp+2319],%o7 +/* 0x0a90 923 (38 41) */ fsubd %f16,%f14,%f14 +/* 0x0a94 924 (38 39) */ add %o4,%g2,%o4 +/* 0x0a98 925 (38 41) */ fmuld %f12,%f6,%f12 +/* 0x0a9c 926 (39 41) */ ldx [%sp+2327],%o5 +/* 0x0aa0 927 (39 42) */ fitod %f21,%f18 +/* 0x0aa4 928 (40 41) */ st %o4,[%i0] +/* 0x0aa8 929 (40 41) */ sllx %o7,19,%o7 +/* 0x0aac 930 (40 43) */ fdtox %f22,%f20 +/* 0x0ab0 931 (41 42) */ std %f20,[%sp+2287] +/* 0x0ab4 932 (41 44) */ fdtox %f10,%f10 +/* 0x0ab8 933 (41 42) */ add %o5,%o7,%o5 +/* 0x0abc 934 (41 44) */ fmuld %f14,%f8,%f20 +/* 0x0ac0 935 (42 43) */ std %f10,[%sp+2295] +/* 0x0ac4 936 (42 43) */ srlx %o4,32,%o7 +/* 0x0ac8 937 (42 45) */ fsubd %f16,%f4,%f4 +/* 0x0acc 938 (42 45) */ fmuld %f14,%f6,%f14 +/* 0x0ad0 939 (43 45) */ ldx [%sp+2311],%g2 +/* 0x0ad4 940 (43 46) */ fdtox %f24,%f10 +/* 0x0ad8 941 (43 44) */ add %o5,%g3,%g3 +/* 0x0adc 942 (44 45) */ std %f10,[%sp+2271] +/* 0x0ae0 943 (44 45) */ add %g3,%o7,%g3 +/* 0x0ae4 944 (44 47) */ fdtox %f12,%f12 +/* 0x0ae8 945 (45 47) */ ldx [%sp+2303],%l0 +/* 0x0aec 946 (45 48) */ fsubd %f16,%f18,%f10 +/* 0x0af0 947 (45 48) */ fmuld %f4,%f8,%f16 +/* 0x0af4 948 (46 47) */ std %f12,[%sp+2279] +/* 0x0af8 949 (46 49) */ fdtox %f20,%f12 +/* 0x0afc 950 (46 49) */ fmuld %f4,%f6,%f4 +/* 0x0b00 951 (47 48) */ std %f12,[%sp+2255] +/* 0x0b04 952 (47 48) */ sllx %l0,19,%l0 +/* 0x0b08 953 (47 50) */ fdtox %f14,%f12 +/* 0x0b0c 954 (48 50) */ ldx [%sp+2287],%o5 +/* 0x0b10 955 (48 49) */ add %g2,%l0,%g2 +/* 0x0b14 956 (48 51) */ fmuld %f10,%f8,%f8 +/* 0x0b18 957 (49 51) */ ldx [%sp+2295],%l1 +/* 0x0b1c 958 (49 50) */ srlx %g3,32,%l0 +/* 0x0b20 959 (49 50) */ add %g2,%g4,%g4 +/* 0x0b24 960 (49 52) */ fmuld %f10,%f6,%f6 +/* 0x0b28 961 (50 51) */ std %f12,[%sp+2263] +/* 0x0b2c 962 (50 51) */ sllx %o5,19,%g2 +/* 0x0b30 963 (50 51) */ add %g4,%l0,%g4 +/* 0x0b34 964 (51 53) */ ldx [%sp+2279],%l0 +/* 0x0b38 965 (51 52) */ srlx %g4,32,%o5 +/* 0x0b3c 966 (51 52) */ add %l1,%g2,%g2 +/* 0x0b40 967 (52 53) */ st %g3,[%i0+4] +/* 0x0b44 968 (52 53) */ add %g2,%g5,%g2 +/* 0x0b48 969 (52 55) */ fdtox %f16,%f10 +/* 0x0b4c 970 (53 55) */ ldx [%sp+2271],%o7 +/* 0x0b50 971 (53 54) */ add %g2,%o5,%g2 +/* 0x0b54 972 (53 56) */ fdtox %f4,%f4 +/* 0x0b58 973 (54 55) */ std %f10,[%sp+2239] +/* 0x0b5c 974 (55 56) */ sllx %o7,19,%o7 +/* 0x0b60 975 (55 56) */ std %f4,[%sp+2247] +/* 0x0b64 976 (55 58) */ fdtox %f8,%f4 +/* 0x0b68 977 (56 57) */ add %l0,%o7,%o7 +/* 0x0b6c 978 (56 58) */ ldx [%sp+2263],%o5 +/* 0x0b70 979 (57 58) */ add %o7,%o0,%o0 +/* 0x0b74 980 (57 58) */ std %f4,[%sp+2223] +/* 0x0b78 981 (57 60) */ fdtox %f6,%f4 +/* 0x0b7c 982 (58 60) */ ldx [%sp+2255],%g5 +/* 0x0b80 983 (58 59) */ srlx %g2,32,%o7 +/* 0x0b84 984 (59 60) */ std %f4,[%sp+2231] +/* 0x0b88 985 (59 60) */ add %o0,%o7,%o0 +/* 0x0b8c 986 (60 61) */ sllx %g5,19,%g5 +/* 0x0b90 987 (60 62) */ ldx [%sp+2247],%l1 +/* 0x0b94 988 (61 62) */ add %o5,%g5,%g5 +/* 0x0b98 989 (61 62) */ st %g2,[%i0+12] +/* 0x0b9c 990 (62 64) */ ldx [%sp+2239],%l0 +/* 0x0ba0 991 (62 63) */ srlx %o0,32,%o4 +/* 0x0ba4 992 (62 63) */ add %g5,%o1,%o1 +/* 0x0ba8 993 (63 64) */ add %o1,%o4,%o1 +/* 0x0bac 994 (63 65) */ ldx [%sp+2223],%o7 +/* 0x0bb0 995 (64 65) */ sllx %l0,19,%g3 +/* 0x0bb4 996 (64 66) */ ldx [%sp+2231],%o5 +/* 0x0bb8 997 (65 66) */ add %l1,%g3,%o4 +/* 0x0bbc 998 (65 66) */ st %o0,[%i0+16] +/* 0x0bc0 999 (66 67) */ add %o4,%o2,%o2 +/* 0x0bc4 1000 (66 67) */ st %o1,[%i0+20] +/* 0x0bc8 1001 (67 68) */ srlx %o1,32,%o4 +/* 0x0bcc 1002 (67 68) */ st %g4,[%i0+8] +/* 0x0bd0 1003 (68 69) */ sllx %o7,19,%g2 +/* 0x0bd4 1004 (68 69) */ add %o2,%o4,%o4 +/* 0x0bd8 1005 (68 69) */ st %o4,[%i0+24] +/* 0x0bdc 1006 (69 70) */ add %o5,%g2,%g2 +/* 0x0be0 1007 (70 71) */ srlx %o4,32,%g3 +/* 0x0be4 1008 (70 71) */ add %g2,%o3,%g2 +/* 0x0be8 1009 (71 72) */ add %g2,%g3,%g2 +/* 0x0bec 1010 (71 72) */ st %g2,[%i0+28] +/* 0x0bf0 1014 (72 73) */ srlx %g2,32,%o3 +/* 0x0bf4 1015 (73 74) */ srl %o3,0,%i0 +/* 0x0bf8 (74 76) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0bfc (76 77) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000050 +! + + .L77000050: /* frequency 1.0 confidence 0.0 */ +/* 0x0c00 1022 ( 0 1) */ subcc %i3,16,%g0 +/* 0x0c04 1023 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50 +/* 0x0c08 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0c0c 1034 ( 1 4) */ ldd [%o2],%f4 +/* 0x0c10 1035 ( 1 2) */ andn %i4,%g2,%g2 +/* 0x0c14 1036 ( 2 3) */ st %g2,[%sp+2483] +/* 0x0c18 1037 ( 2 3) */ srl %i4,19,%g2 +/* 0x0c1c 1038 ( 3 4) */ st %g2,[%sp+2479] +/* 0x0c20 1039 ( 3 5) */ fxnor %f0,%f4,%f4 +/* 0x0c24 1040 ( 4 7) */ ldd [%o0],%f8 +/* 0x0c28 1041 ( 5 8) */ fitod %f4,%f10 +/* 0x0c2c 1042 ( 5 8) */ ldd [%o0+8],%f16 +/* 0x0c30 1043 ( 6 9) */ ldd [%o2+8],%f14 +/* 0x0c34 1044 ( 6 9) */ fitod %f5,%f4 +/* 0x0c38 1045 ( 7 10) */ ld [%sp+2483],%f13 +/* 0x0c3c 1046 ( 8 11) */ ld [%sp+2479],%f7 +/* 0x0c40 1047 ( 8 11) */ fsubd %f16,%f10,%f10 +/* 0x0c44 1048 ( 9 11) */ fxnor %f0,%f14,%f14 +/* 0x0c48 1049 (10 13) */ fsubd %f16,%f4,%f4 +/* 0x0c4c 1050 (14 15) */ fmovs %f8,%f12 +/* 0x0c50 1051 (15 16) */ fmovs %f8,%f6 +/* 0x0c54 1052 (17 20) */ fsubd %f12,%f8,%f12 +/* 0x0c58 1053 (18 21) */ fsubd %f6,%f8,%f6 +/* 0x0c5c 1054 (19 22) */ fitod %f14,%f8 +/* 0x0c60 1055 (20 23) */ fmuld %f10,%f12,%f18 +/* 0x0c64 1056 (20 23) */ fitod %f15,%f14 +/* 0x0c68 1057 (21 24) */ fmuld %f10,%f6,%f10 +/* 0x0c6c 1058 (22 25) */ fsubd %f16,%f8,%f8 +/* 0x0c70 1059 (22 25) */ fmuld %f4,%f12,%f20 +/* 0x0c74 1060 (23 26) */ fmuld %f4,%f6,%f4 +/* 0x0c78 1061 (23 26) */ fsubd %f16,%f14,%f14 +/* 0x0c7c 1062 (24 27) */ fdtox %f10,%f10 +/* 0x0c80 1063 (24 25) */ std %f10,[%sp+2463] +/* 0x0c84 1064 (25 28) */ fmuld %f8,%f12,%f10 +/* 0x0c88 1065 (25 28) */ fdtox %f18,%f18 +/* 0x0c8c 1066 (25 26) */ std %f18,[%sp+2471] +/* 0x0c90 1067 (26 29) */ fmuld %f8,%f6,%f8 +/* 0x0c94 1068 (26 29) */ fdtox %f4,%f4 +/* 0x0c98 1069 (26 27) */ std %f4,[%sp+2447] +/* 0x0c9c 1070 (27 30) */ fmuld %f14,%f12,%f4 +/* 0x0ca0 1071 (27 30) */ fdtox %f20,%f18 +/* 0x0ca4 1072 (27 28) */ std %f18,[%sp+2455] +/* 0x0ca8 1073 (28 31) */ fdtox %f10,%f10 +/* 0x0cac 1074 (28 29) */ std %f10,[%sp+2439] +/* 0x0cb0 1075 (28 31) */ fmuld %f14,%f6,%f14 +/* 0x0cb4 1076 (29 32) */ fdtox %f8,%f8 +/* 0x0cb8 1077 (29 30) */ std %f8,[%sp+2431] +/* 0x0cbc 1078 (30 33) */ ldd [%o2+16],%f10 +/* 0x0cc0 1079 (30 33) */ fdtox %f4,%f4 +/* 0x0cc4 1080 (31 34) */ ldd [%o2+24],%f8 +/* 0x0cc8 1081 (31 34) */ fdtox %f14,%f14 +/* 0x0ccc 1082 (32 33) */ std %f4,[%sp+2423] +/* 0x0cd0 1083 (32 34) */ fxnor %f0,%f10,%f10 +/* 0x0cd4 1084 (33 35) */ fxnor %f0,%f8,%f4 +/* 0x0cd8 1085 (33 34) */ std %f14,[%sp+2415] +/* 0x0cdc 1086 (34 37) */ fitod %f10,%f8 +/* 0x0ce0 1087 (35 38) */ fitod %f11,%f10 +/* 0x0ce4 1088 (36 39) */ fitod %f4,%f14 +/* 0x0ce8 1089 (37 40) */ fsubd %f16,%f8,%f8 +/* 0x0cec 1090 (38 41) */ fsubd %f16,%f10,%f10 +/* 0x0cf0 1091 (39 42) */ fsubd %f16,%f14,%f14 +/* 0x0cf4 1092 (40 43) */ fmuld %f8,%f12,%f18 +/* 0x0cf8 1093 (40 43) */ fitod %f5,%f4 +/* 0x0cfc 1094 (41 44) */ fmuld %f8,%f6,%f8 +/* 0x0d00 1095 (42 45) */ fmuld %f10,%f12,%f20 +/* 0x0d04 1096 (43 46) */ fmuld %f10,%f6,%f10 +/* 0x0d08 1097 (43 46) */ fsubd %f16,%f4,%f4 +/* 0x0d0c 1098 (44 47) */ fdtox %f8,%f8 +/* 0x0d10 1099 (44 45) */ std %f8,[%sp+2399] +/* 0x0d14 1100 (45 48) */ fmuld %f14,%f12,%f8 +/* 0x0d18 1101 (45 48) */ fdtox %f18,%f18 +/* 0x0d1c 1102 (45 46) */ std %f18,[%sp+2407] +/* 0x0d20 1103 (46 49) */ fdtox %f10,%f10 +/* 0x0d24 1104 (46 47) */ std %f10,[%sp+2383] +/* 0x0d28 1105 (46 49) */ fmuld %f14,%f6,%f14 +/* 0x0d2c 1106 (47 50) */ fmuld %f4,%f12,%f10 +/* 0x0d30 1107 (47 50) */ fdtox %f20,%f18 +/* 0x0d34 1108 (47 48) */ std %f18,[%sp+2391] +/* 0x0d38 1109 (48 51) */ fdtox %f8,%f8 +/* 0x0d3c 1110 (48 49) */ std %f8,[%sp+2375] +/* 0x0d40 1111 (48 51) */ fmuld %f4,%f6,%f4 +/* 0x0d44 1112 (49 52) */ fdtox %f14,%f14 +/* 0x0d48 1113 (49 50) */ std %f14,[%sp+2367] +/* 0x0d4c 1117 (50 53) */ ldd [%o2+32],%f8 +/* 0x0d50 1118 (50 53) */ fdtox %f10,%f10 +/* 0x0d54 1119 (51 54) */ fdtox %f4,%f4 +/* 0x0d58 1120 (51 52) */ std %f4,[%sp+2351] +/* 0x0d5c 1121 (52 54) */ fxnor %f0,%f8,%f8 +/* 0x0d60 1122 (52 55) */ ldd [%o2+40],%f14 +/* 0x0d64 1123 (53 54) */ std %f10,[%sp+2359] +/* 0x0d68 1124 (54 57) */ fitod %f8,%f4 +/* 0x0d6c 1125 (55 57) */ fxnor %f0,%f14,%f10 +/* 0x0d70 1126 (56 59) */ fitod %f9,%f8 +/* 0x0d74 1127 (57 60) */ fsubd %f16,%f4,%f4 +/* 0x0d78 1128 (58 61) */ fitod %f10,%f14 +/* 0x0d7c 1129 (59 62) */ fsubd %f16,%f8,%f8 +/* 0x0d80 1130 (60 63) */ fmuld %f4,%f12,%f18 +/* 0x0d84 1131 (60 63) */ fitod %f11,%f10 +/* 0x0d88 1132 (61 64) */ fmuld %f4,%f6,%f4 +/* 0x0d8c 1133 (61 64) */ fsubd %f16,%f14,%f14 +/* 0x0d90 1134 (62 65) */ fmuld %f8,%f12,%f20 +/* 0x0d94 1135 (63 66) */ fmuld %f8,%f6,%f8 +/* 0x0d98 1136 (63 66) */ fsubd %f16,%f10,%f10 +/* 0x0d9c 1137 (64 67) */ fdtox %f4,%f4 +/* 0x0da0 1138 (64 65) */ std %f4,[%sp+2335] +/* 0x0da4 1139 (65 68) */ fmuld %f14,%f12,%f4 +/* 0x0da8 1140 (65 68) */ fdtox %f18,%f18 +/* 0x0dac 1141 (65 66) */ std %f18,[%sp+2343] +/* 0x0db0 1142 (66 69) */ fdtox %f8,%f8 +/* 0x0db4 1143 (66 67) */ std %f8,[%sp+2319] +/* 0x0db8 1144 (66 69) */ fmuld %f14,%f6,%f14 +/* 0x0dbc 1145 (67 70) */ fmuld %f10,%f12,%f8 +/* 0x0dc0 1146 (67 70) */ fdtox %f20,%f18 +/* 0x0dc4 1147 (67 68) */ std %f18,[%sp+2327] +/* 0x0dc8 1148 (68 71) */ fdtox %f4,%f4 +/* 0x0dcc 1149 (68 69) */ std %f4,[%sp+2311] +/* 0x0dd0 1150 (68 71) */ fmuld %f10,%f6,%f10 +/* 0x0dd4 1151 (69 72) */ fdtox %f14,%f14 +/* 0x0dd8 1152 (69 70) */ std %f14,[%sp+2303] +/* 0x0ddc 1153 (70 73) */ ldd [%o2+48],%f4 +/* 0x0de0 1154 (70 73) */ fdtox %f8,%f8 +/* 0x0de4 1155 (71 74) */ fdtox %f10,%f10 +/* 0x0de8 1156 (71 72) */ std %f10,[%sp+2287] +/* 0x0dec 1157 (72 74) */ fxnor %f0,%f4,%f4 +/* 0x0df0 1158 (72 75) */ ldd [%o2+56],%f14 +/* 0x0df4 1159 (73 74) */ std %f8,[%sp+2295] +/* 0x0df8 1160 (74 77) */ fitod %f4,%f10 +/* 0x0dfc 1161 (75 78) */ fitod %f5,%f4 +/* 0x0e00 1162 (76 78) */ fxnor %f0,%f14,%f8 +/* 0x0e04 1163 (77 80) */ fsubd %f16,%f10,%f10 +/* 0x0e08 1164 (78 81) */ fsubd %f16,%f4,%f4 +/* 0x0e0c 1165 (79 82) */ fitod %f8,%f14 +/* 0x0e10 1166 (80 83) */ fmuld %f10,%f12,%f18 +/* 0x0e14 1167 (80 83) */ fitod %f9,%f8 +/* 0x0e18 1168 (81 84) */ fmuld %f10,%f6,%f10 +/* 0x0e1c 1169 (82 85) */ fmuld %f4,%f12,%f20 +/* 0x0e20 1170 (82 85) */ fsubd %f16,%f14,%f14 +/* 0x0e24 1171 (83 86) */ fdtox %f18,%f18 +/* 0x0e28 1172 (83 84) */ std %f18,[%sp+2279] +/* 0x0e2c 1173 (83 86) */ fmuld %f4,%f6,%f4 +/* 0x0e30 1174 (84 87) */ fdtox %f10,%f10 +/* 0x0e34 1175 (84 85) */ std %f10,[%sp+2271] +/* 0x0e38 1176 (85 88) */ fdtox %f20,%f10 +/* 0x0e3c 1177 (85 86) */ std %f10,[%sp+2263] +/* 0x0e40 1178 (86 89) */ fdtox %f4,%f4 +/* 0x0e44 1179 (86 87) */ std %f4,[%sp+2255] +/* 0x0e48 1180 (86 89) */ fmuld %f14,%f12,%f10 +/* 0x0e4c 1181 (87 90) */ fmuld %f14,%f6,%f4 +/* 0x0e50 1182 (89 92) */ fdtox %f10,%f10 +/* 0x0e54 1183 (89 90) */ std %f10,[%sp+2247] +/* 0x0e58 1184 (90 93) */ fdtox %f4,%f4 +/* 0x0e5c 1185 (90 91) */ std %f4,[%sp+2239] +/* 0x0e60 1189 (91 93) */ ldx [%sp+2463],%g2 +/* 0x0e64 1190 (91 94) */ fsubd %f16,%f8,%f4 +/* 0x0e68 1191 (92 94) */ ldx [%sp+2471],%g3 +/* 0x0e6c 1192 (93 96) */ ld [%i1],%g4 +/* 0x0e70 1193 (93 94) */ sllx %g2,19,%g2 +/* 0x0e74 1194 (94 96) */ ldx [%sp+2455],%g5 +/* 0x0e78 1195 (94 95) */ add %g3,%g2,%g2 +/* 0x0e7c 1196 (94 97) */ fmuld %f4,%f6,%f6 +/* 0x0e80 1197 (95 97) */ ldx [%sp+2447],%g3 +/* 0x0e84 1198 (95 96) */ add %g2,%g4,%g4 +/* 0x0e88 1199 (95 98) */ fmuld %f4,%f12,%f4 +/* 0x0e8c 1200 (96 97) */ st %g4,[%i0] +/* 0x0e90 1201 (96 97) */ srlx %g4,32,%g4 +/* 0x0e94 1202 (97 100) */ ld [%i1+8],%o0 +/* 0x0e98 1203 (97 98) */ sllx %g3,19,%g2 +/* 0x0e9c 1204 (97 100) */ fdtox %f6,%f6 +/* 0x0ea0 1205 (98 101) */ ld [%i1+4],%g3 +/* 0x0ea4 1206 (98 99) */ add %g5,%g2,%g2 +/* 0x0ea8 1207 (98 101) */ fdtox %f4,%f4 +/* 0x0eac 1208 (99 101) */ ldx [%sp+2439],%g5 +/* 0x0eb0 1209 (100 103) */ ld [%i1+12],%o1 +/* 0x0eb4 1210 (100 101) */ add %g2,%g3,%g2 +/* 0x0eb8 1211 (101 103) */ ldx [%sp+2431],%g3 +/* 0x0ebc 1212 (101 102) */ add %g2,%g4,%g4 +/* 0x0ec0 1213 (102 103) */ st %g4,[%i0+4] +/* 0x0ec4 1214 (103 104) */ std %f6,[%sp+2223] +/* 0x0ec8 1215 (103 104) */ sllx %g3,19,%g2 +/* 0x0ecc 1216 (104 106) */ ldx [%sp+2423],%g3 +/* 0x0ed0 1217 (104 105) */ add %g5,%g2,%g2 +/* 0x0ed4 1218 (105 107) */ ldx [%sp+2415],%g5 +/* 0x0ed8 1219 (105 106) */ add %g2,%o0,%g2 +/* 0x0edc 1220 (106 107) */ std %f4,[%sp+2231] +/* 0x0ee0 1221 (106 107) */ srlx %g4,32,%o0 +/* 0x0ee4 1222 (107 109) */ ldx [%sp+2407],%g4 +/* 0x0ee8 1223 (107 108) */ sllx %g5,19,%g5 +/* 0x0eec 1224 (107 108) */ add %g2,%o0,%g2 +/* 0x0ef0 1225 (108 109) */ st %g2,[%i0+8] +/* 0x0ef4 1226 (108 109) */ srlx %g2,32,%o0 +/* 0x0ef8 1227 (108 109) */ add %g3,%g5,%g3 +/* 0x0efc 1228 (109 111) */ ldx [%sp+2399],%g5 +/* 0x0f00 1229 (109 110) */ add %g3,%o1,%g3 +/* 0x0f04 1230 (110 113) */ ld [%i1+16],%o1 +/* 0x0f08 1231 (110 111) */ add %g3,%o0,%g3 +/* 0x0f0c 1232 (111 112) */ st %g3,[%i0+12] +/* 0x0f10 1233 (111 112) */ sllx %g5,19,%g5 +/* 0x0f14 1234 (112 113) */ srlx %g3,32,%o0 +/* 0x0f18 1235 (112 113) */ add %g4,%g5,%g2 +/* 0x0f1c 1236 (112 114) */ ldx [%sp+2383],%g5 +/* 0x0f20 1237 (113 115) */ ldx [%sp+2391],%g4 +/* 0x0f24 1238 (113 114) */ add %g2,%o1,%g2 +/* 0x0f28 1239 (114 117) */ ld [%i1+20],%o1 +/* 0x0f2c 1240 (114 115) */ sllx %g5,19,%g5 +/* 0x0f30 1241 (114 115) */ add %g2,%o0,%g2 +/* 0x0f34 1242 (115 116) */ st %g2,[%i0+16] +/* 0x0f38 1243 (115 116) */ srlx %g2,32,%o0 +/* 0x0f3c 1244 (115 116) */ add %g4,%g5,%g3 +/* 0x0f40 1245 (116 118) */ ldx [%sp+2367],%g5 +/* 0x0f44 1246 (116 117) */ add %g3,%o1,%g3 +/* 0x0f48 1247 (117 119) */ ldx [%sp+2375],%g4 +/* 0x0f4c 1248 (117 118) */ add %g3,%o0,%g3 +/* 0x0f50 1249 (118 121) */ ld [%i1+24],%o1 +/* 0x0f54 1250 (118 119) */ sllx %g5,19,%g5 +/* 0x0f58 1251 (119 120) */ st %g3,[%i0+20] +/* 0x0f5c 1252 (119 120) */ add %g4,%g5,%g2 +/* 0x0f60 1253 (120 122) */ ldx [%sp+2351],%g5 +/* 0x0f64 1254 (120 121) */ srlx %g3,32,%o0 +/* 0x0f68 1255 (120 121) */ add %g2,%o1,%g2 +/* 0x0f6c 1256 (121 123) */ ldx [%sp+2359],%g4 +/* 0x0f70 1257 (121 122) */ add %g2,%o0,%g2 +/* 0x0f74 1258 (122 125) */ ld [%i1+28],%o1 +/* 0x0f78 1259 (122 123) */ sllx %g5,19,%g5 +/* 0x0f7c 1260 (123 124) */ st %g2,[%i0+24] +/* 0x0f80 1261 (123 124) */ add %g4,%g5,%g3 +/* 0x0f84 1265 (124 126) */ ldx [%sp+2335],%g5 +/* 0x0f88 1266 (124 125) */ srlx %g2,32,%o0 +/* 0x0f8c 1267 (124 125) */ add %g3,%o1,%g3 +/* 0x0f90 1268 (125 127) */ ldx [%sp+2343],%g4 +/* 0x0f94 1269 (125 126) */ add %g3,%o0,%g3 +/* 0x0f98 1270 (126 127) */ sllx %g5,19,%g5 +/* 0x0f9c 1271 (126 129) */ ld [%i1+32],%o1 +/* 0x0fa0 1272 (127 128) */ add %g4,%g5,%g2 +/* 0x0fa4 1273 (127 129) */ ldx [%sp+2319],%g5 +/* 0x0fa8 1274 (128 130) */ ldx [%sp+2327],%g4 +/* 0x0fac 1275 (128 129) */ srlx %g3,32,%o0 +/* 0x0fb0 1276 (128 129) */ add %g2,%o1,%g2 +/* 0x0fb4 1277 (129 130) */ st %g3,[%i0+28] +/* 0x0fb8 1278 (129 130) */ sllx %g5,19,%g5 +/* 0x0fbc 1279 (129 130) */ add %g2,%o0,%g2 +/* 0x0fc0 1280 (130 133) */ ld [%i1+36],%o1 +/* 0x0fc4 1281 (130 131) */ add %g4,%g5,%g3 +/* 0x0fc8 1282 (131 133) */ ldx [%sp+2303],%g5 +/* 0x0fcc 1283 (131 132) */ srlx %g2,32,%o0 +/* 0x0fd0 1284 (132 134) */ ldx [%sp+2311],%g4 +/* 0x0fd4 1285 (132 133) */ add %g3,%o1,%g3 +/* 0x0fd8 1286 (133 134) */ sllx %g5,19,%g5 +/* 0x0fdc 1287 (133 134) */ st %g2,[%i0+32] +/* 0x0fe0 1288 (133 134) */ add %g3,%o0,%g3 +/* 0x0fe4 1289 (134 135) */ add %g4,%g5,%g2 +/* 0x0fe8 1290 (134 136) */ ldx [%sp+2287],%g5 +/* 0x0fec 1291 (135 137) */ ldx [%sp+2295],%g4 +/* 0x0ff0 1292 (135 136) */ srlx %g3,32,%o0 +/* 0x0ff4 1293 (136 139) */ ld [%i1+40],%o1 +/* 0x0ff8 1294 (136 137) */ sllx %g5,19,%g5 +/* 0x0ffc 1295 (137 138) */ st %g3,[%i0+36] +/* 0x1000 1296 (137 138) */ add %g4,%g5,%g3 +/* 0x1004 1297 (138 140) */ ldx [%sp+2271],%g5 +/* 0x1008 1298 (138 139) */ add %g2,%o1,%g2 +/* 0x100c 1299 (139 141) */ ldx [%sp+2279],%g4 +/* 0x1010 1300 (139 140) */ add %g2,%o0,%g2 +/* 0x1014 1301 (140 143) */ ld [%i1+44],%o1 +/* 0x1018 1302 (140 141) */ sllx %g5,19,%g5 +/* 0x101c 1303 (141 142) */ st %g2,[%i0+40] +/* 0x1020 1304 (141 142) */ srlx %g2,32,%o0 +/* 0x1024 1305 (141 142) */ add %g4,%g5,%g2 +/* 0x1028 1306 (142 144) */ ldx [%sp+2255],%g5 +/* 0x102c 1307 (142 143) */ add %g3,%o1,%g3 +/* 0x1030 1308 (143 145) */ ldx [%sp+2263],%g4 +/* 0x1034 1309 (143 144) */ add %g3,%o0,%g3 +/* 0x1038 1310 (144 147) */ ld [%i1+48],%o1 +/* 0x103c 1311 (144 145) */ sllx %g5,19,%g5 +/* 0x1040 1312 (145 146) */ srlx %g3,32,%o0 +/* 0x1044 1313 (145 146) */ st %g3,[%i0+44] +/* 0x1048 1314 (145 146) */ add %g4,%g5,%g3 +/* 0x104c 1315 (146 148) */ ldx [%sp+2239],%g5 +/* 0x1050 1316 (146 147) */ add %g2,%o1,%g2 +/* 0x1054 1317 (147 150) */ ld [%i1+52],%o1 +/* 0x1058 1318 (147 148) */ add %g2,%o0,%g2 +/* 0x105c 1319 (148 150) */ ldx [%sp+2247],%g4 +/* 0x1060 1320 (148 149) */ sllx %g5,19,%g5 +/* 0x1064 1321 (149 150) */ srlx %g2,32,%o0 +/* 0x1068 1322 (149 150) */ st %g2,[%i0+48] +/* 0x106c 1323 (149 150) */ add %g3,%o1,%g3 +/* 0x1070 1324 (150 153) */ ld [%i1+56],%o1 +/* 0x1074 1325 (150 151) */ add %g4,%g5,%g2 +/* 0x1078 1326 (150 151) */ add %g3,%o0,%g3 +/* 0x107c 1327 (151 153) */ ldx [%sp+2223],%g5 +/* 0x1080 1328 (151 152) */ srlx %g3,32,%o0 +/* 0x1084 1329 (152 154) */ ldx [%sp+2231],%g4 +/* 0x1088 1330 (152 153) */ add %g2,%o1,%g2 +/* 0x108c 1331 (153 154) */ sllx %g5,19,%g5 +/* 0x1090 1332 (153 156) */ ld [%i1+60],%o1 +/* 0x1094 1333 (153 154) */ add %g2,%o0,%g2 +/* 0x1098 1334 (154 155) */ st %g3,[%i0+52] +/* 0x109c 1335 (154 155) */ add %g4,%g5,%g3 +/* 0x10a0 1336 (155 156) */ st %g2,[%i0+56] +/* 0x10a4 1337 (155 156) */ srlx %g2,32,%g2 +/* 0x10a8 1338 (155 156) */ add %g3,%o1,%g3 +/* 0x10ac 1339 (156 157) */ add %g3,%g2,%g2 +/* 0x10b0 1340 (156 157) */ st %g2,[%i0+60] +/* 0x10b4 1344 (157 158) */ srlx %g2,32,%o3 +/* 0x10b8 1345 (158 159) */ srl %o3,0,%i0 +/* 0x10bc (159 161) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x10c0 (161 162) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000073 +! + + .L77000073: /* frequency 1.0 confidence 0.0 */ + + + or %g0, %i4, %o2 + or %g0, %o0, %o1 + or %g0, %i3, %o0 + +! +! ENTRY .L77000052 +! + + .L77000052: /* frequency 1.0 confidence 0.0 */ +/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2 +/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+2227] +/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3 +/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14 +/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2 +/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+2223] +/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5 +/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2 +/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6 +/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1000),%g1 +/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2 +/* 0x1054 1337 ( 3 4) */ xor %g1,-625,%g1 +/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20 +/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3 +/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8 +/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3 +/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10 +/* 0x106c 1343 ( 5 7) */ ld [%sp+2227],%f9 +/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0 +/* 0x1074 1345 ( 6 8) */ ld [%sp+2223],%f11 +/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1000),%g1 +/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1 +/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18 +/* 0x1084 1349 ( 7 8) */ xor %g1,-617,%g1 +/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4 +/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16 +/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50 +/* 0x1094 ( 8 9) */ subcc %o0,0,%g0 +/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2 +/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1000),%g1 +/* 0x10a0 1356 (10 11) */ xor %g1,-609,%g1 +/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0 +/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7 +/* 0x10ac 1359 (11 12) */ sethi %hi(0x1000),%g1 +/* 0x10b0 1360 (12 13) */ xor %g1,-601,%g1 +/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4 +/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50 +/* 0x10bc (13 14) */ sub %o3,2,%o2 +/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2 +/* 0x10c4 1365 (14 15) */ add %o1,16,%g5 +/* 0x10c8 1366 (14 15) */ or %g0,4,%g4 +/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0 +/* 0x10d0 1368 (15 16) */ add %o1,8,%o1 +/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6 +/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4 +/* 0x10dc 1371 (16 17) */ add %o1,16,%o1 +/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12 +/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0 +/* 0x10e8 1374 (17 18) */ add %o1,8,%o1 +/* 0x10ec 1375 (18 21) */ fitod %f7,%f2 +/* 0x10f0 1376 (19 22) */ fitod %f6,%f6 +/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10 +/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8 +/* 0x1100 1380 (23 26) */ fitod %f13,%f4 +/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6 +/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000154 +! + + .L990000154: /* frequency 1.0 confidence 0.0 */ +/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24 +/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4 +/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4 +/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22 +/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26 +/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0 +/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7 +/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28 +/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6 +/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2 +/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3 +/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0 +/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4 +/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2 +/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12 +/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6 +/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96] +/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96] +/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2 +/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6 +/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96] +/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1 +/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12 +/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4 +/* 0x116c 1408 (10 11) */ std %f0,[%o4-96] +/* 0x1170 1409 (11 14) */ ldd [%o1],%f0 +/* 0x1174 1410 (11 14) */ fitod %f9,%f2 +/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28 +/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24 +/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22 +/* 0x1184 1414 (13 16) */ fdtox %f4,%f4 +/* 0x1188 1415 (14 17) */ fitod %f10,%f6 +/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10 +/* 0x1190 1417 (15 18) */ fdtox %f24,%f24 +/* 0x1194 1418 (16 19) */ fdtox %f22,%f22 +/* 0x1198 1419 (16 17) */ std %f24,[%g3-64] +/* 0x119c 1420 (17 18) */ std %f22,[%g2-64] +/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10 +/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6 +/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64] +/* 0x11ac 1424 (18 19) */ add %o1,8,%o1 +/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10 +/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0 +/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64] +/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22 +/* 0x11c0 1429 (20 23) */ fitod %f13,%f4 +/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26 +/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24 +/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0 +/* 0x11d4 1434 (23 26) */ fitod %f8,%f6 +/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8 +/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26 +/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24 +/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32] +/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32] +/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8 +/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6 +/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32] +/* 0x11f8 1443 (27 28) */ add %o1,8,%o1 +/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8 +/* 0x1200 1445 (28 29) */ std %f0,[%o4-32] +/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50 +/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000157 +! + + .L990000157: /* frequency 1.0 confidence 0.0 */ +/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28 +/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24 +/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3 +/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12 +/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26 +/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2 +/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4 +/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22 +/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7 +/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6 +/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128] +/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4 +/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2 +/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0 +/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6 +/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24 +/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10 +/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128] +/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10 +/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128] +/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26 +/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10 +/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22 +/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12 +/* 0x1270 1474 (10 13) */ fdtox %f0,%f0 +/* 0x1274 1475 (10 11) */ std %f0,[%o4-128] +/* 0x1278 1476 (11 14) */ fitod %f8,%f4 +/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6 +/* 0x1280 1478 (12 15) */ fdtox %f26,%f0 +/* 0x1284 1479 (12 13) */ std %f0,[%g3-96] +/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10 +/* 0x128c 1481 (13 16) */ fdtox %f2,%f2 +/* 0x1290 1482 (13 14) */ std %f2,[%g2-96] +/* 0x1294 1483 (14 17) */ fitod %f9,%f0 +/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2 +/* 0x129c 1485 (15 18) */ fdtox %f24,%f8 +/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96] +/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4 +/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8 +/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12 +/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96] +/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0 +/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6 +/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64] +/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10 +/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64] +/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6 +/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2 +/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64] +/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4 +/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2 +/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8 +/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64] +/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6 +/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32] +/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0 +/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4 +/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32] +/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2 +/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32] +/* 0x1300 1510 (26 29) */ fdtox %f0,%f0 +/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50 +/* 0x1308 (26 27) */ std %f0,[%o4-32] + +! +! ENTRY .L77000054 +! + + .L77000054: /* frequency 1.0 confidence 0.0 */ +/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0 + +! +! ENTRY .L990000161 +! + + .L990000161: /* frequency 1.0 confidence 0.0 */ +/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4 +/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1 +/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0 +/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2 +/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0 +/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2 +/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0 +/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6 +/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4 +/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2 +/* 0x133c 1527 (11 14) */ fdtox %f6,%f6 +/* 0x1340 1528 (11 12) */ std %f6,[%g3] +/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0 +/* 0x1348 1530 (12 15) */ fdtox %f4,%f4 +/* 0x134c 1531 (12 13) */ std %f4,[%g2] +/* 0x1350 1532 (12 13) */ add %g2,32,%g2 +/* 0x1354 1533 (13 16) */ fdtox %f2,%f2 +/* 0x1358 1534 (13 14) */ std %f2,[%o7] +/* 0x135c 1535 (13 14) */ add %o7,32,%o7 +/* 0x1360 1536 (14 17) */ fdtox %f0,%f0 +/* 0x1364 1537 (14 15) */ std %f0,[%o4] +/* 0x1368 1538 (14 15) */ add %o4,32,%o4 +/* 0x136c 1539 (15 16) */ add %g3,32,%g3 +/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50 +/* 0x1374 (16 19) */ ldd [%o1],%f0 + +! +! ENTRY .L77000056 +! + + .L77000056: /* frequency 1.0 confidence 0.0 */ +/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0 + +! +! ENTRY .L990000162 +! + + .L990000162: /* frequency 1.0 confidence 0.0 */ +/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50 +/* 0x1380 ( 0 1) */ nop +/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1000),%g1 +/* 0x1388 1556 ( 1 2) */ xor %g1,-625,%g1 +/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4 +/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5 +/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1000),%g1 +/* 0x1398 1560 ( 3 4) */ xor %g1,-617,%g1 +/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7 +/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2 +/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2 +/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3 +/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0 +/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50 +/* 0x13b4 ( 6 7) */ sethi %hi(0x1000),%g1 +/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2 +/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3 +/* 0x13c0 1570 ( 7 8) */ xor %g1,-585,%g1 +/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4 +/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2 +/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1000),%g1 +/* 0x13d0 1574 ( 9 10) */ xor %g1,-593,%g1 +/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2 +/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5 +/* 0x13dc 1577 (10 11) */ sethi %hi(0x1000),%g1 +/* 0x13e0 1578 (11 12) */ xor %g1,-617,%g1 +/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1 +/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1 +/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0 +/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1 +/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3 +/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0 +/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1 +/* 0x1400 1586 (16 17) */ add %g4,8,%g4 +/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3 +/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0 +/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2 +/* 0x1410 1590 (18 19) */ st %o0,[%g3-4] +/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000142 +! + + .L990000142: /* frequency 1.0 confidence 0.0 */ +/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2 +/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2 +/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3 +/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5 +/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1 +/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0 +/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2 +/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0 +/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1 +/* 0x143c 1602 ( 4 5) */ st %o1,[%g3] +/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5 +/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0 +/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1 +/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0 +/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3 +/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2 +/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0 +/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3 +/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1 +/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0 +/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12] +/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5 +/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4 +/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0 +/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1 +/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2 +/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3 +/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2 +/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1 +/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0 +/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2 +/* 0x1494 1624 (12 13) */ st %o2,[%g3-8] +/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5 +/* 0x149c 1626 (12 13) */ add %g5,64,%g5 +/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2 +/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0 +/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1 +/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0 +/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3 +/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2 +/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0 +/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4] +/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50 +/* 0x14c4 (16 17) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000145 +! + + .L990000145: /* frequency 1.0 confidence 0.0 */ +/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3 +/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3 +/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2 +/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0 +/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0 +/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4] +/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0 +/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50 +/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5 + +! +! ENTRY .L77000058 +! + + .L77000058: /* frequency 1.0 confidence 0.0 */ +/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2 + +! +! ENTRY .L990000160 +! + + .L990000160: /* frequency 1.0 confidence 0.0 */ +/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3 +/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0 +/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2 +/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1 +/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2 +/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2 +/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0 +/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5 +/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0 +/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4 +/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0 +/* 0x151c 1661 ( 4 5) */ st %o0,[%g3] +/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0 +/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5 +/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3 +/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50 +/* 0x1530 ( 6 8) */ ldx [%g2],%o2 + +! +! ENTRY .L77770061 +! + + .L77770061: /* frequency 1.0 confidence 0.0 */ +/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0 + + +/* 0x124c 1476 ( 0 0) */ .type mul_add,2 +/* 0x124c 1477 ( 0 0) */ .size mul_add,(.-mul_add) +/* 0x124c 1480 ( 0 0) */ .align 8 +/* 0x1250 1486 ( 0 0) */ .global mul_add_inp + +! +! ENTRY mul_add_inp +! + + .global mul_add_inp + mul_add_inp: /* frequency 1.0 confidence 0.0 */ +/* 0x1250 1488 ( 0 1) */ save %sp,-176,%sp +/* 0x1254 1500 ( 1 2) */ sra %i2,0,%o3 +/* 0x1258 1501 ( 1 2) */ or %g0,%i1,%o2 +/* 0x125c 1502 ( 2 3) */ or %g0,%i0,%o0 +/* 0x1260 1503 ( 2 3) */ or %g0,%i0,%o1 +/* 0x1264 1504 ( 3 5) */ call mul_add ! params = ! Result = +/* 0x1268 ( 4 5) */ srl %i3,0,%o4 +/* 0x126c 1506 ( 5 6) */ srl %o0,0,%i0 +/* 0x1270 ( 6 8) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1274 ( 8 9) */ restore %g0,%g0,%g0 +/* 0x1278 1509 ( 0 0) */ .type mul_add_inp,2 +/* 0x1278 1510 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp) + + .section ".data",#alloc,#write +/* 0x1278 6 ( 0 0) */ .align 8 + +! +! ENTRY mask_cnst +! + + mask_cnst: /* frequency 1.0 confidence 0.0 */ +/* 0x1278 8 ( 0 0) */ .xword -9223372034707292160 +/* 0x1280 9 ( 0 0) */ .type mask_cnst,#object +/* 0x1280 10 ( 0 0) */ .size mask_cnst,8 + diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c new file mode 100644 index 0000000000..94e86eedb9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpvalpha.c @@ -0,0 +1,183 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include + +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + Plo = asm("mulq %a0, %a1, %v0", a, b); \ + Phi = asm("umulh %a0, %a1, %v0", a, b); \ + } + +/* This is empty for the loop in s_mpv_mul_d */ +#define CARRY_ADD + +#define ONE_MUL \ + a_i = *a++; \ + MP_MUL_DxD(a_i, b, a1b1, a0b0); \ + a0b0 += carry; \ + if (a0b0 < carry) \ + ++a1b1; \ + CARRY_ADD \ + *c++ = a0b0; \ + carry = a1b1; + +#define FOUR_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL + +#define SIXTEEN_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL + +#define THIRTYTWO_MUL \ + SIXTEEN_MUL \ + SIXTEEN_MUL + +#define ONETWENTYEIGHT_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL + +#define EXPAND_256(CALL) \ + mp_digit carry = 0; \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + if (a_len & 255) { \ + if (a_len & 1) { \ + ONE_MUL \ + } \ + if (a_len & 2) { \ + ONE_MUL \ + ONE_MUL \ + } \ + if (a_len & 4) { \ + FOUR_MUL \ + } \ + if (a_len & 8) { \ + FOUR_MUL \ + FOUR_MUL \ + } \ + if (a_len & 16) { \ + SIXTEEN_MUL \ + } \ + if (a_len & 32) { \ + THIRTYTWO_MUL \ + } \ + if (a_len & 64) { \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + } \ + if (a_len & 128) { \ + ONETWENTYEIGHT_MUL \ + } \ + a_len = a_len & (-256); \ + } \ + if (a_len >= 256) { \ + carry = CALL(a, a_len, b, c, carry); \ + c += a_len; \ + } + +#define FUNC_NAME(NAME) \ + mp_digit NAME(const mp_digit *a, \ + mp_size a_len, \ + mp_digit b, mp_digit *c, \ + mp_digit carry) + +#define DECLARE_MUL_256(FNAME) \ + FUNC_NAME(FNAME) \ + { \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + while (a_len) { \ + ONETWENTYEIGHT_MUL \ + ONETWENTYEIGHT_MUL \ + a_len -= 256; \ + } \ + return carry; \ + } + +/* Expanding the loop in s_mpv_mul_d appeared to slow down the + (admittedly) small number of tests (i.e., timetest) used to + measure performance, so this define disables that optimization. */ +#define DO_NOT_EXPAND 1 + +/* Need forward declaration so it can be instantiated after + the routine that uses it; this helps locality somewhat */ +#if !defined(DO_NOT_EXPAND) +FUNC_NAME(s_mpv_mul_d_MUL256); +#endif + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ +#if defined(DO_NOT_EXPAND) + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } +#else + EXPAND_256(s_mpv_mul_d_MUL256) +#endif + *c = carry; +} + +#if !defined(DO_NOT_EXPAND) +DECLARE_MUL_256(s_mpv_mul_d_MUL256) +#endif + +#undef CARRY_ADD +/* This is redefined for the loop in s_mpv_mul_d_add */ +#define CARRY_ADD \ + a0b0 += a_i = *c; \ + if (a0b0 < a_i) \ + ++a1b1; + +/* Need forward declaration so it can be instantiated between the + two routines that use it; this helps locality somewhat */ +FUNC_NAME(s_mpv_mul_d_add_MUL256); + +/* c += a * b */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + *c = carry; +} + +/* Instantiate multiply 256 routine here */ +DECLARE_MUL_256(s_mpv_mul_d_add_MUL256) + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +} diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c new file mode 100644 index 0000000000..461d40ab36 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mulsqr.c @@ -0,0 +1,84 @@ +/* + * Test whether to include squaring code given the current settings + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include + +#define MP_SQUARE 1 /* make sure squaring code is included */ + +#include "mpi.h" +#include "mpprime.h" + +int +main(int argc, char *argv[]) +{ + int ntests, prec, ix; + unsigned int seed; + clock_t start, stop; + double multime, sqrtime; + mp_int a, c; + + seed = (unsigned int)time(NULL); + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + if ((ntests = abs(atoi(argv[1]))) == 0) { + fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]); + return 1; + } + if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) { + fprintf(stderr, "%s: must request at least %d bits.\n", argv[0], + CHAR_BIT); + return 1; + } + + prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; + + mp_init_size(&a, prec); + mp_init_size(&c, 2 * prec); + + /* Test multiplication by self */ + srand(seed); + start = clock(); + for (ix = 0; ix < ntests; ix++) { + mpp_random_size(&a, prec); + mp_mul(&a, &a, &c); + } + stop = clock(); + + multime = (double)(stop - start) / CLOCKS_PER_SEC; + + /* Test squaring */ + srand(seed); + start = clock(); + for (ix = 0; ix < ntests; ix++) { + mpp_random_size(&a, prec); + mp_sqr(&a, &c); + } + stop = clock(); + + sqrtime = (double)(stop - start) / CLOCKS_PER_SEC; + + printf("Multiply: %.4f\n", multime); + printf("Square: %.4f\n", sqrtime); + if (multime < sqrtime) { + printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime)); + printf("Prefer: multiply\n"); + } else { + printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime)); + printf("Prefer: square\n"); + } + + mp_clear(&a); + mp_clear(&c); + return 0; +} diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c new file mode 100644 index 0000000000..3e64a2acaa --- /dev/null +++ b/security/nss/lib/freebl/mpi/primes.c @@ -0,0 +1,841 @@ +/* + * These tables of primes wwere generated using the 'sieve' program + * (sieve.c) and converted to this format with 'ptab.pl'. + * + * The 'small' table is just the first 128 primes. The 'large' table + * is a table of all the prime values that will fit into a single + * mp_digit (given the current size of an mp_digit, which is two bytes). + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if SMALL_TABLE +#define MP_PRIME_TAB_SIZE 128 +#else +#define MP_PRIME_TAB_SIZE 6542 +#endif + +const int prime_tab_size = MP_PRIME_TAB_SIZE; +const mp_digit prime_tab[] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, +#if !SMALL_TABLE + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653, + 0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D, + 0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3, + 0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713, + 0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755, + 0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D, + 0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3, + 0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815, + 0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851, + 0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B, + 0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB, + 0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907, + 0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949, + 0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977, + 0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7, + 0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13, + 0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61, + 0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85, + 0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5, + 0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1, + 0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D, + 0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B, + 0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3, + 0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07, + 0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F, + 0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95, + 0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5, + 0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13, + 0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55, + 0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3, + 0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5, + 0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17, + 0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57, + 0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F, + 0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5, + 0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17, + 0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53, + 0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3, + 0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9, + 0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021, + 0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073, + 0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3, + 0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1, + 0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139, + 0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181, + 0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3, + 0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D, + 0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241, + 0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F, + 0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD, + 0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D, + 0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367, + 0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393, + 0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF, + 0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F, + 0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471, + 0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD, + 0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517, + 0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543, + 0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583, + 0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5, + 0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615, + 0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F, + 0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F, + 0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9, + 0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709, + 0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777, + 0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9, + 0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5, + 0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843, + 0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F, + 0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9, + 0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB, + 0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949, + 0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3, + 0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED, + 0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D, + 0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B, + 0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1, + 0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB, + 0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B, + 0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73, + 0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1, + 0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27, + 0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55, + 0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5, + 0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21, + 0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D, + 0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89, + 0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5, + 0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B, + 0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D, + 0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD, + 0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF, + 0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49, + 0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97, + 0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1, + 0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027, + 0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063, + 0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1, + 0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB, + 0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159, + 0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1, + 0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5, + 0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B, + 0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263, + 0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D, + 0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED, + 0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333, + 0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383, + 0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9, + 0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B, + 0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F, + 0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1, + 0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9, + 0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513, + 0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573, + 0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1, + 0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605, + 0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F, + 0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681, + 0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9, + 0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735, + 0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773, + 0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3, + 0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807, + 0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F, + 0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881, + 0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB, + 0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923, + 0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983, + 0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3, + 0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13, + 0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B, + 0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9, + 0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB, + 0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F, + 0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99, + 0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3, + 0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35, + 0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87, + 0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB, + 0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F, + 0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89, + 0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3, + 0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25, + 0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F, + 0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5, + 0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB, + 0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45, + 0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81, + 0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB, + 0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023, + 0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071, + 0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9, + 0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF, + 0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D, + 0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D, + 0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3, + 0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215, + 0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B, + 0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7, + 0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9, + 0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335, + 0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379, + 0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB, + 0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419, + 0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D, + 0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB, + 0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D, + 0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571, + 0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B, + 0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7, + 0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637, + 0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D, + 0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1, + 0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F, + 0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D, + 0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3, + 0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847, + 0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D, + 0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5, + 0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923, + 0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B, + 0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1, + 0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5, + 0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27, + 0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D, + 0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1, + 0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21, + 0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71, + 0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD, + 0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3, + 0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29, + 0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71, + 0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1, + 0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B, + 0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F, + 0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D, + 0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3, + 0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F, + 0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87, + 0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7, + 0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37, + 0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79, + 0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD, + 0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B, + 0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D, + 0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB, + 0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115, + 0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B, + 0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB, + 0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F, + 0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261, + 0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D, + 0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307, + 0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F, + 0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF, + 0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED, + 0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B, + 0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F, + 0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF, + 0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513, + 0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577, + 0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7, + 0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609, + 0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645, + 0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D, + 0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF, + 0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F, + 0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771, + 0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF, + 0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807, + 0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855, + 0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1, + 0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919, + 0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969, + 0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1, + 0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41, + 0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89, + 0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF, + 0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B, + 0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD, + 0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF, + 0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D, + 0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55, + 0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93, + 0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05, + 0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51, + 0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B, + 0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9, + 0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35, + 0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85, + 0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5, + 0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09, + 0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D, + 0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9, + 0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF, + 0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047, + 0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7, + 0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B, + 0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149, + 0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7, + 0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7, + 0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243, + 0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297, + 0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9, + 0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341, + 0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387, + 0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9, + 0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413, + 0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455, + 0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D, + 0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7, + 0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D, + 0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585, + 0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED, + 0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F, + 0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B, + 0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD, + 0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707, + 0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761, + 0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5, + 0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F, + 0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D, + 0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB, + 0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1, + 0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B, + 0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F, + 0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED, + 0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17, + 0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77, + 0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB, + 0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19, + 0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79, + 0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1, + 0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29, + 0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F, + 0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF, + 0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05, + 0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47, + 0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95, + 0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7, + 0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B, + 0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49, + 0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5, + 0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27, + 0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65, + 0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5, + 0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F, + 0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085, + 0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9, + 0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B, + 0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191, + 0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1, + 0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241, + 0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295, + 0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD, + 0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F, + 0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377, + 0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB, + 0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427, + 0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475, + 0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7, + 0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521, + 0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F, + 0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD, + 0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607, + 0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B, + 0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B, + 0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5, + 0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F, + 0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791, + 0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809, + 0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B, + 0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871, + 0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7, + 0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917, + 0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B, + 0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB, + 0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07, + 0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B, + 0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1, + 0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39, + 0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93, + 0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF, + 0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59, + 0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83, + 0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3, + 0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25, + 0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61, + 0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7, + 0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15, + 0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B, + 0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3, + 0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F, + 0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D, + 0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5, + 0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9, + 0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027, + 0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079, + 0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7, + 0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121, + 0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183, + 0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB, + 0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219, + 0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277, + 0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB, + 0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303, + 0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F, + 0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1, + 0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D, + 0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D, + 0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1, + 0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D, + 0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D, + 0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5, + 0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633, + 0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685, + 0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1, + 0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741, + 0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD, + 0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF, + 0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853, + 0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B, + 0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD, + 0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F, + 0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993, + 0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF, + 0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11, + 0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59, + 0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1, + 0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D, + 0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71, + 0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7, + 0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19, + 0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81, + 0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7, + 0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F, + 0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D, + 0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5, + 0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F, + 0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B, + 0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7, + 0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7, + 0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D, + 0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87, + 0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED, + 0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F, + 0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095, + 0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB, + 0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D, + 0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D, + 0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB, + 0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231, + 0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F, + 0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7, + 0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B, + 0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D, + 0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F, + 0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7, + 0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425, + 0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483, + 0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF, + 0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D, + 0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9, + 0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9, + 0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639, + 0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699, + 0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1, + 0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B, + 0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777, + 0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5, + 0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F, + 0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B, + 0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED, + 0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927, + 0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981, + 0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB, + 0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35, + 0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79, + 0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD, + 0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13, + 0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF, + 0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF, + 0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53, + 0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F, + 0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9, + 0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41, + 0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7, + 0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5, + 0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69, + 0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF, + 0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB, + 0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45, + 0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D, + 0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9, + 0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023, + 0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053, + 0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF, + 0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113, + 0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167, + 0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9, + 0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215, + 0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275, + 0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD, + 0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7, + 0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D, + 0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5, + 0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409, + 0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469, + 0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1, + 0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535, + 0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589, + 0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD, + 0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635, + 0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9, + 0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD, + 0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739, + 0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787, + 0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9, + 0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F, + 0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887, + 0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD, + 0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929, + 0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D, + 0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7, + 0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13, + 0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75, + 0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF, + 0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F, + 0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D, + 0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD, + 0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23, + 0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67, + 0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1, + 0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15, + 0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97, + 0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1, + 0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47, + 0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95, + 0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05, + 0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D, + 0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3, + 0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5, + 0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057, + 0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5, + 0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1, + 0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129, + 0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D, + 0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED, + 0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243, + 0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B, + 0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5, + 0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B, + 0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387, + 0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5, + 0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F, + 0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459, + 0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3, + 0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3, + 0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553, + 0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D, + 0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9, + 0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D, + 0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691, + 0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD, + 0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F, + 0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759, + 0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9, + 0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805, + 0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F, + 0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7, + 0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937, + 0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3, + 0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15, + 0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59, + 0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB, + 0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07, + 0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73, + 0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5, + 0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09, + 0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57, + 0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9, + 0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED, + 0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D, + 0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1, + 0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05, + 0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61, + 0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7, + 0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD, + 0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57, + 0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3, + 0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015, + 0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069, + 0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF, + 0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119, + 0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173, + 0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD, + 0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D, + 0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281, + 0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9, + 0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D, + 0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B, + 0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B, + 0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B, + 0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5, + 0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501, + 0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F, + 0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF, + 0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609, + 0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659, + 0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5, + 0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF, + 0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755, + 0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3, + 0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827, + 0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893, + 0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5, + 0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F, + 0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D, + 0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1, + 0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25, + 0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65, + 0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF, + 0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F, + 0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61, + 0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF, + 0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33, + 0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F, + 0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3, + 0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F, + 0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F, + 0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3, + 0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27, + 0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D, + 0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9, + 0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39, + 0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89, + 0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD, + 0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029, + 0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D, + 0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7, + 0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119, + 0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179, + 0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD, + 0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221, + 0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B, + 0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD, + 0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313, + 0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371, + 0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5, + 0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF, + 0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461, + 0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7, + 0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509, + 0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F, + 0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3, + 0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641, + 0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1, + 0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1, + 0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757, + 0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD, + 0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803, + 0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857, + 0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7, + 0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF, + 0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937, + 0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989, + 0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9, + 0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37, + 0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F, + 0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9, + 0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29, + 0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87, + 0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7, + 0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B, + 0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91, + 0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15, + 0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57, + 0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB, + 0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD, + 0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65, + 0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7, + 0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07, + 0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61, + 0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9, + 0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7, + 0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F, + 0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB, + 0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D, + 0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171, + 0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD, + 0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217, + 0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279, + 0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3, + 0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315, + 0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B, + 0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD, + 0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B, + 0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D, + 0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1, + 0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F, + 0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565, + 0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1, + 0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D, + 0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687, + 0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1, + 0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F, + 0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7, + 0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB, + 0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D, + 0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF, + 0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933, + 0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F, + 0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9, + 0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09, + 0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51, + 0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D, + 0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5, + 0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43, + 0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD, + 0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D, + 0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F, + 0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9, + 0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF, + 0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53, + 0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B, + 0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9, + 0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D, + 0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77, + 0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1, + 0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F, + 0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67, + 0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5, + 0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005, + 0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071, + 0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7, + 0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135, + 0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F, + 0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1, + 0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219, + 0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279, + 0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD, + 0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327, + 0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363, + 0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9, + 0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419, + 0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455, + 0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD, + 0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537, + 0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585, + 0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615, + 0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657, + 0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695, + 0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3, + 0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F, + 0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B, + 0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF, + 0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B, + 0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869, + 0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB, + 0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915, + 0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963, + 0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7, + 0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D, + 0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3, + 0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB, + 0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B, + 0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1, + 0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B, + 0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B, + 0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD, + 0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F, + 0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B, + 0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3, + 0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19, + 0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81, + 0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1, + 0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71, + 0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5, + 0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B, + 0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071, + 0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF, + 0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7, + 0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175, + 0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7, + 0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227, + 0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277, + 0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF, + 0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319, + 0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B, + 0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409, + 0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455, + 0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9, + 0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B, + 0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B, + 0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF, + 0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D, + 0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F, + 0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB, + 0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755, + 0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D, + 0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7, + 0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D, + 0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877, + 0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7, + 0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9, + 0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941, + 0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3, + 0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13, + 0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B, + 0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF, + 0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F, + 0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7, + 0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37, + 0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73, + 0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD, + 0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51, + 0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99, + 0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03, + 0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D, + 0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93, + 0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3, + 0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D, + 0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D, + 0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1 +#endif +}; diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il new file mode 100644 index 0000000000..d2e8024ac2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_32.il @@ -0,0 +1,1291 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! The interface to the VIS instructions as declared below (and in the VIS +! User's Manual) will not change, but the macro implementation might change +! in the future. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,8 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,8 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,8 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,8 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,8 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,8 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,8 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,8 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,8 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,8 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,8 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,8 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,8 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,8 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + faligndata %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple16 %f4,%f10,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne16 %f4,%f10,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple32 %f4,%f10,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne32 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt16 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq16 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt32 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq32 %f4,%f10,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + st %o2,[%sp+0x4c] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16au %f4,%f10,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16al %f4,%f10,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + std %o0,[%sp+0x40] + ldd [%sp+0x40],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd16 %f4,%f10,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd16s %f4,%f10,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd32 %f4,%f10,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd32s %f4,%f10,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub16 %f4,%f10,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub16s %f4,%f10,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub32 %f4,%f10,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub32s %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + std %f0,[%o4] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpackfix %f4,%f0 + fpackfix %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack32 %f4,%f10,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpackfix %f4,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f6 + fpackfix %f4,%f0 + fpackfix %f6,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); +! + .inline vis_pdist,24 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + pdist %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpmerge %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fmovs %f5, %f2 + fexpand %f2,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnor %f4,%f10,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnors %f4,%f10,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fandnot1 %f4,%f10,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fandnot1s %f4,%f10,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fnot1 %f4,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fnot1s %f4,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxor %f4,%f10,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxors %f4,%f10,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnand %f4,%f10,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnands %f4,%f10,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fand %f4,%f10,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fands %f4,%f10,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxnor %f4,%f10,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxnors %f4,%f10,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fsrc1 %f4,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fsrc1s %f4,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fornot1 %f4,%f10,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fornot1s %f4,%f10,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + for %f4,%f10,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fors %f4,%f10,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,16 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,4 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,8 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,8 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,8 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,4 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,4 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,8 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,4 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,4 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,8 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,4 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,4 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,8 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,4 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array8 %o3,%o2,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array16 %o3,%o2,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array32 %o3,%o2,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0; + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + st %o0,[%sp+0x48] ! store float frs1 + ld [%sp+0x48],%f0 + st %o1,[%sp+0x48] ! store float frs2 + ld [%sp+0x48],%f1 + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,4 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,4 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,4 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,4 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,4 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,4 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,4 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,4 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,4 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,8 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,8 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,4 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,4 + prefetch [%o0+0],2 + .end diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il new file mode 100644 index 0000000000..cbe2b5aa27 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_64.il @@ -0,0 +1,997 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! This file is to be used in place of vis.il in 64-bit builds. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,16 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,16 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,16 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,16 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,16 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,16 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,16 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,16 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,16 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,16 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,16 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,16 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,12 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,12 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + faligndata %f0,%f2,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + fcmple16 %f0,%f2,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + fcmpne16 %f0,%f2,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + fcmple32 %f0,%f2,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + fcmpne32 %f0,%f2,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + fcmpgt16 %f0,%f2,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + fcmpeq16 %f0,%f2,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + fcmpgt32 %f0,%f2,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + fcmpeq32 %f0,%f2,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + fmul8x16 %f1,%f2,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + fmul8x16 %f1,%f4,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + fmul8x16au %f1,%f3,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + fmul8x16al %f1,%f3,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + fmul8sux16 %f0,%f2,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + fmul8ulx16 %f0,%f2,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + fmuld8sux16 %f1,%f3,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + fmuld8ulx16 %f1,%f3,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + fpadd16 %f0,%f2,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + fpadd16s %f1,%f3,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + fpadd32 %f0,%f2,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + fpadd32s %f1,%f3,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + fpsub16 %f0,%f2,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + fpsub16s %f1,%f3,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + fpsub32 %f0,%f2,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + fpsub32s %f1,%f3,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + fpack16 %f0,%f0 + .end +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,24 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + st %f0,[%o2+0] + st %f1,[%o2+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,24 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + std %f0,[%o2] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,24 + fpackfix %f0,%f0 + fpackfix %f2,%f1 + st %f0,[%o2+0] + st %f1,[%o2+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + fpack16 %f2,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + fpack16 %f2,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + fpack32 %f0,%f2,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + fpackfix %f0,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + fpackfix %f0,%f0 + fpackfix %f2,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/, +! double pxls2 /*frs2*/); +! + .inline vis_pxldist64,24 + pdist %f2,%f4,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + fpmerge %f1,%f3,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + fexpand %f1,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + fexpand %f0,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + fexpand %f1,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + fnor %f0,%f2,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + fnors %f1,%f3,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + fandnot1 %f0,%f2,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + fandnot1s %f1,%f3,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + fnot1 %f0,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + fnot1s %f1,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + fxor %f0,%f2,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + fxors %f1,%f3,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + fnand %f0,%f2,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + fnands %f1,%f3,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + fand %f0,%f2,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + fands %f1,%f3,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + fxnor %f0,%f2,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + fxnors %f1,%f3,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + fsrc1 %f0,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + fsrc1s %f1,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + fornot1 %f0,%f2,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + fornot1s %f1,%f3,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + for %f0,%f2,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + fors %f1,%f3,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,20 + stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,20 + stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,28 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,20 + stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,20 + stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,20 + stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,20 + stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,20 + stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,16 + stda %f0,[%o1]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,24 + stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,16 + stda %f0,[%o1]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,16 + stda %f0,[%o1]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,24 + stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,16 + stda %f0,[%o1]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,16 + stda %f0,[%o1]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,16 + stda %f0,[%o1]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,16 + stda %f0,[%o1]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,16 + stda %f0,[%o1]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,8 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,16 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,12 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,12 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,8 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,8 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,16 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,8 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,8 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,16 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,8 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,8 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,16 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,8 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + array8 %o0,%o1,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + array16 %o0,%o1,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + array32 %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + fmovs %f0,%f0 + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + fmovs %f1,%f0 ! %f1 = float frs1; put in hi; + fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1; + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+2183] + ld [%sp+2183],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + st %o0,[%sp+2183] + ld [%sp+2183],%f0 + st %o1,[%sp+2183] + ld [%sp+2183],%f1 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+2183] + ld [%sp+2183],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + stx %o0,[%sp+2183] + ldd [%sp+2183],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,8 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,8 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,8 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,8 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,8 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,8 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,12 + sta %f1,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,12 + sta %f1,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,12 + sta %f1,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,16 + stda %f0,[%o1+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,16 + stda %f0,[%o1]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,16 + stda %f0,[%o1]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,8 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,8 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,8 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,16 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,16 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,8 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,8 + prefetch [%o0+0],2 + .end diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h new file mode 100644 index 0000000000..275de59df8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_proto.h @@ -0,0 +1,234 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Prototypes for the inline templates in vis.il + */ + +#ifndef VIS_PROTO_H +#define VIS_PROTO_H + +#pragma ident "@(#)vis_proto.h 1.3 97/03/30 SMI" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Pure edge handling instructions */ +int vis_edge8(void * /*frs1*/, void * /*frs2*/); +int vis_edge8l(void * /*frs1*/, void * /*frs2*/); +int vis_edge16(void * /*frs1*/, void * /*frs2*/); +int vis_edge16l(void * /*frs1*/, void * /*frs2*/); +int vis_edge32(void * /*frs1*/, void * /*frs2*/); +int vis_edge32l(void * /*frs1*/, void * /*frs2*/); + +/* Edge handling instructions with negative return values if cc set. */ +int vis_edge8cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/); +int vis_edge16cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/); +int vis_edge32cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/); + +/* Alignment instructions. */ +void *vis_alignaddr(void * /*rs1*/, int /*rs2*/); +void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/); +double vis_faligndata(double /*frs1*/, double /*frs2*/); + +/* Partitioned comparison instructions. */ +int vis_fcmple16(double /*frs1*/, double /*frs2*/); +int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +int vis_fcmple32(double /*frs1*/, double /*frs2*/); +int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); + +/* Partitioned multiplication. */ +#if 0 +double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +#endif +double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); + +/* Partitioned addition & subtraction. */ +double vis_fpadd16(double /*frs1*/, double /*frs2*/); +float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +double vis_fpadd32(double /*frs1*/, double /*frs2*/); +float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +double vis_fpsub16(double /*frs1*/, double /*frs2*/); +float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +double vis_fpsub32(double /*frs1*/, double /*frs2*/); +float vis_fpsub32s(float /*frs1*/, float /*frs2*/); + +/* Pixel packing & clamping. */ +float vis_fpack16(double /*frs2*/); +double vis_fpack32(double /*frs1*/, double /*frs2*/); +float vis_fpackfix(double /*frs2*/); + +/* Combined pack ops. */ +double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +void vis_st2_fpack16(double, double, double *); +void vis_std_fpack16(double, double, double *); +void vis_st2_fpackfix(double, double, double *); + +double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); + +/* Motion estimation. */ +double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); + +/* Channel merging. */ +double vis_fpmerge(float /*frs1*/, float /*frs2*/); + +/* Pixel expansion. */ +double vis_fexpand(float /*frs2*/); +double vis_fexpand_hi(double /*frs2*/); +double vis_fexpand_lo(double /*frs2*/); + +/* Bitwise logical operators. */ +double vis_fnor(double /*frs1*/, double /*frs2*/); +float vis_fnors(float /*frs1*/, float /*frs2*/); +double vis_fandnot(double /*frs1*/, double /*frs2*/); +float vis_fandnots(float /*frs1*/, float /*frs2*/); +double vis_fnot(double /*frs1*/); +float vis_fnots(float /*frs1*/); +double vis_fxor(double /*frs1*/, double /*frs2*/); +float vis_fxors(float /*frs1*/, float /*frs2*/); +double vis_fnand(double /*frs1*/, double /*frs2*/); +float vis_fnands(float /*frs1*/, float /*frs2*/); +double vis_fand(double /*frs1*/, double /*frs2*/); +float vis_fands(float /*frs1*/, float /*frs2*/); +double vis_fxnor(double /*frs1*/, double /*frs2*/); +float vis_fxnors(float /*frs1*/, float /*frs2*/); +double vis_fsrc(double /*frs1*/); +float vis_fsrcs(float /*frs1*/); +double vis_fornot(double /*frs1*/, double /*frs2*/); +float vis_fornots(float /*frs1*/, float /*frs2*/); +double vis_for(double /*frs1*/, double /*frs2*/); +float vis_fors(float /*frs1*/, float /*frs2*/); +double vis_fzero(void); +float vis_fzeros(void); +double vis_fone(void); +float vis_fones(void); + +/* Partial stores. */ +void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/, + void * /*rs3*/, int /*rmask*/); +void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/); + +/* Byte & short stores. */ +void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/); +void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/); +void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/); + +/* Byte & short loads. */ +double vis_lddfa_ASI_FL8P(void * /*rs1*/); +double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/); +double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/); +double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/); +double vis_lddfa_ASI_FL8S(void * /*rs1*/); +double vis_lddfa_ASI_FL16P(void * /*rs1*/); +double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/); +double vis_lddfa_ASI_FL16S(void * /*rs1*/); +double vis_lddfa_ASI_FL8PL(void * /*rs1*/); +double vis_lddfa_ASI_FL8SL(void * /*rs1*/); +double vis_lddfa_ASI_FL16PL(void * /*rs1*/); +double vis_lddfa_ASI_FL16SL(void * /*rs1*/); + +/* Direct write to GSR, read from GSR */ +void vis_write_gsr(unsigned int /*GSR*/); +unsigned int vis_read_gsr(void); + +/* Voxel texture mapping. */ +#if !defined(_NO_LONGLONG) +unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/); +unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/); +unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/); +#endif /* !defined(_NO_LONGLONG) */ + +/* Register aliasing and type casts. */ +float vis_read_hi(double /*frs1*/); +float vis_read_lo(double /*frs1*/); +double vis_write_hi(double /*frs1*/, float /*frs2*/); +double vis_write_lo(double /*frs1*/, float /*frs2*/); +double vis_freg_pair(float /*frs1*/, float /*frs2*/); +float vis_to_float(unsigned int /*value*/); +double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +double vis_to_double_dup(unsigned int /*value*/); +#if !defined(_NO_LONGLONG) +double vis_ll_to_double(unsigned long long /*value*/); +#endif /* !defined(_NO_LONGLONG) */ + +/* Miscellany (no inlines) */ +void vis_error(char * /*fmt*/, int /*a0*/); +void vis_sim_init(void); + +/* For better performance */ +#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg)) + +/* Nicknames for explicit ASI loads and stores. */ +#define vis_st_u8 vis_stdfa_ASI_FL8P +#define vis_st_u8_i vis_stdfa_ASI_FL8P_index +#define vis_st_u8_le vis_stdfa_ASI_FL8PL +#define vis_st_u16 vis_stdfa_ASI_FL16P +#define vis_st_u16_i vis_stdfa_ASI_FL16P_index +#define vis_st_u16_le vis_stdfa_ASI_FL16PL + +#define vis_ld_u8 vis_lddfa_ASI_FL8P +#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index +#define vis_ld_u8_le vis_lddfa_ASI_FL8PL +#define vis_ld_u16 vis_lddfa_ASI_FL16P +#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index +#define vis_ld_u16_le vis_lddfa_ASI_FL16PL + +#define vis_pst_8 vis_stdfa_ASI_PST8P +#define vis_pst_16 vis_stdfa_ASI_PST16P +#define vis_pst_32 vis_stdfa_ASI_PST32P + +#define vis_st_u8s vis_stdfa_ASI_FL8S +#define vis_st_u8s_le vis_stdfa_ASI_FL8SL +#define vis_st_u16s vis_stdfa_ASI_FL16S +#define vis_st_u16s_le vis_stdfa_ASI_FL16SL + +#define vis_ld_u8s vis_lddfa_ASI_FL8S +#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL +#define vis_ld_u16s vis_lddfa_ASI_FL16S +#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL + +#define vis_pst_8s vis_stdfa_ASI_PST8S +#define vis_pst_16s vis_stdfa_ASI_PST16S +#define vis_pst_32s vis_stdfa_ASI_PST32S + +/* "<" and ">=" may be implemented in terms of ">" and "<=". */ +#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a)) +#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a)) +#define vis_fcmpge16(a, b) vis_fcmple16((b), (a)) +#define vis_fcmpge32(a, b) vis_fcmple32((b), (a)) + +#ifdef __cplusplus +} // End of extern "C" +#endif /* __cplusplus */ + +#endif /* VIS_PROTO_H */ diff --git a/security/nss/lib/freebl/nsslowhash.c b/security/nss/lib/freebl/nsslowhash.c new file mode 100644 index 0000000000..7a22a357e1 --- /dev/null +++ b/security/nss/lib/freebl/nsslowhash.c @@ -0,0 +1,161 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "prtypes.h" +#include "prenv.h" +#include "secerr.h" +#include "blapi.h" +#include "hasht.h" +#include "plhash.h" +#include "nsslowhash.h" +#include "blapii.h" + +struct NSSLOWInitContextStr { + int count; +}; + +struct NSSLOWHASHContextStr { + const SECHashObject *hashObj; + void *hashCtxt; +}; + +#ifndef NSS_FIPS_DISABLED +static int +nsslow_GetFIPSEnabled(void) +{ +#ifdef LINUX + FILE *f; + char d; + size_t size; + const char *env; + + env = PR_GetEnvSecure("NSS_FIPS"); + if (env && (*env == 'y' || *env == 'f' || *env == '1' || *env == 't')) { + return 1; + } + + f = fopen("/proc/sys/crypto/fips_enabled", "r"); + if (!f) + return 0; + + size = fread(&d, 1, 1, f); + fclose(f); + if (size != 1) + return 0; + if (d != '1') + return 0; +#endif /* LINUX */ + return 1; +} +#endif /* NSS_FIPS_DISABLED */ + +static NSSLOWInitContext dummyContext = { 0 }; +static PRBool post_failed = PR_TRUE; + +NSSLOWInitContext * +NSSLOW_Init(void) +{ +#ifdef FREEBL_NO_DEPEND + (void)FREEBL_InitStubs(); +#endif + +#ifndef NSS_FIPS_DISABLED + /* make sure the FIPS product is installed if we are trying to + * go into FIPS mode */ + if (nsslow_GetFIPSEnabled()) { + if (BL_FIPSEntryOK(PR_TRUE, PR_FALSE) != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + post_failed = PR_TRUE; + return NULL; + } + } +#endif + post_failed = PR_FALSE; + + return &dummyContext; +} + +void +NSSLOW_Shutdown(NSSLOWInitContext *context) +{ + PORT_Assert(context == &dummyContext); + return; +} + +void +NSSLOW_Reset(NSSLOWInitContext *context) +{ + PORT_Assert(context == &dummyContext); + return; +} + +NSSLOWHASHContext * +NSSLOWHASH_NewContext(NSSLOWInitContext *initContext, + HASH_HashType hashType) +{ + NSSLOWHASHContext *context; + + if (post_failed) { + PORT_SetError(SEC_ERROR_PKCS11_DEVICE_ERROR); + return NULL; + } + + if (initContext != &dummyContext) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return (NULL); + } + + context = PORT_ZNew(NSSLOWHASHContext); + if (!context) { + return NULL; + } + context->hashObj = HASH_GetRawHashObject(hashType); + if (!context->hashObj) { + PORT_Free(context); + return NULL; + } + context->hashCtxt = context->hashObj->create(); + if (!context->hashCtxt) { + PORT_Free(context); + return NULL; + } + + return context; +} + +void +NSSLOWHASH_Begin(NSSLOWHASHContext *context) +{ + return context->hashObj->begin(context->hashCtxt); +} + +void +NSSLOWHASH_Update(NSSLOWHASHContext *context, const unsigned char *buf, + unsigned int len) +{ + return context->hashObj->update(context->hashCtxt, buf, len); +} + +void +NSSLOWHASH_End(NSSLOWHASHContext *context, unsigned char *buf, + unsigned int *ret, unsigned int len) +{ + return context->hashObj->end(context->hashCtxt, buf, ret, len); +} + +void +NSSLOWHASH_Destroy(NSSLOWHASHContext *context) +{ + context->hashObj->destroy(context->hashCtxt, PR_TRUE); + PORT_Free(context); +} + +unsigned int +NSSLOWHASH_Length(NSSLOWHASHContext *context) +{ + return context->hashObj->length; +} diff --git a/security/nss/lib/freebl/nsslowhash.h b/security/nss/lib/freebl/nsslowhash.h new file mode 100644 index 0000000000..d8f058715b --- /dev/null +++ b/security/nss/lib/freebl/nsslowhash.h @@ -0,0 +1,33 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Provide FIPS validated hashing for applications that only need hashing. + * NOTE: mac'ing requires keys and will not work in this interface. + * Also NOTE: this only works with Hashing. Only the FIPS interface is enabled. + */ + +#ifndef _NSSLOWHASH_H_ +#define _NSSLOWHASH_H_ + +typedef struct NSSLOWInitContextStr NSSLOWInitContext; +typedef struct NSSLOWHASHContextStr NSSLOWHASHContext; + +NSSLOWInitContext *NSSLOW_Init(void); +void NSSLOW_Shutdown(NSSLOWInitContext *context); +void NSSLOW_Reset(NSSLOWInitContext *context); +NSSLOWHASHContext *NSSLOWHASH_NewContext( + NSSLOWInitContext *initContext, + HASH_HashType hashType); +void NSSLOWHASH_Begin(NSSLOWHASHContext *context); +void NSSLOWHASH_Update(NSSLOWHASHContext *context, + const unsigned char *buf, + unsigned int len); +void NSSLOWHASH_End(NSSLOWHASHContext *context, + unsigned char *buf, + unsigned int *ret, unsigned int len); +void NSSLOWHASH_Destroy(NSSLOWHASHContext *context); +unsigned int NSSLOWHASH_Length(NSSLOWHASHContext *context); + +#endif diff --git a/security/nss/lib/freebl/ppc-crypto.h b/security/nss/lib/freebl/ppc-crypto.h new file mode 100644 index 0000000000..4d283895f2 --- /dev/null +++ b/security/nss/lib/freebl/ppc-crypto.h @@ -0,0 +1,31 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PPC_CRYPTO_H +#define PPC_CRYPTO_H 1 + +#if defined(__powerpc64__) && defined(__ALTIVEC__) && \ + !defined(NSS_DISABLE_ALTIVEC) +#include "altivec-types.h" + +/* The ghash freebl test tries to use this in C++, and gcc defines conflict. */ +#ifdef __cplusplus +#undef pixel +#undef vector +#undef bool +#endif + +/* + * PPC CRYPTO requires at least gcc 8 or clang. The LE check is purely + * because it's only been tested on LE. If you're interested in BE, + * please send a patch. + */ +#if (defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 8)) && \ + defined(IS_LITTLE_ENDIAN) && defined(__VSX__) +#define USE_PPC_CRYPTO +#endif + +#endif /* defined(__powerpc64__) && !defined(NSS_DISABLE_ALTIVEC) && defined(__ALTIVEC__) */ + +#endif diff --git a/security/nss/lib/freebl/ppc-gcm-wrap.c b/security/nss/lib/freebl/ppc-gcm-wrap.c new file mode 100644 index 0000000000..ac58744cbd --- /dev/null +++ b/security/nss/lib/freebl/ppc-gcm-wrap.c @@ -0,0 +1,458 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* Copyright(c) 2013, Intel Corp. */ + +/* Wrapper functions for PowerPC optimized implementation of AES-GCM */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapii.h" +#include "blapit.h" +#include "gcm.h" +#include "ctr.h" +#include "secerr.h" +#include "prtypes.h" +#include "pkcs11t.h" + +#include +#include + +#include "ppc-gcm.h" +#include "rijndael.h" + +struct ppc_AES_GCMContextStr { + unsigned char Htbl[8 * AES_BLOCK_SIZE]; + unsigned char X0[AES_BLOCK_SIZE]; + unsigned char T[AES_BLOCK_SIZE]; + unsigned char CTR[AES_BLOCK_SIZE]; + AESContext *aes_context; + unsigned long tagBits; + unsigned long Alen; + unsigned long Mlen; + freeblCipherFunc cipher; + PRBool ctr_context_init; + gcmIVContext gcm_iv; +}; + +SECStatus ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm, + const unsigned char *iv, + unsigned long ivLen, unsigned long tagBits, + const unsigned char *aad, unsigned long aadLen); + +ppc_AES_GCMContext * +ppc_AES_GCM_CreateContext(void *context, + freeblCipherFunc cipher, + const unsigned char *params) +{ + ppc_AES_GCMContext *gcm = NULL; + AESContext *aes = (AESContext *)context; + const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params; + SECStatus rv; + + gcm = PORT_ZNew(ppc_AES_GCMContext); + if (gcm == NULL) { + return NULL; + } + + /* initialize context fields */ + gcm->aes_context = aes; + gcm->cipher = cipher; + gcm->Alen = 0; + gcm->Mlen = 0; + gcm->ctr_context_init = PR_FALSE; + + /* first prepare H and its derivatives for ghash */ + ppc_aes_gcmINIT(gcm->Htbl, aes->k.expandedKey, aes->Nr); + + gcm_InitIVContext(&gcm->gcm_iv); + + /* if gcmParams is NULL, then we are creating an PKCS #11 MESSAGE + * style context, in which we initialize the key once, then do separate + * iv/aad's for each message. If we are doing that kind of operation, + * we've finished with init here. We'll init the Counter in each AEAD + * call */ + if (gcmParams == NULL) { + return gcm; + } + + rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, + gcmParams->ulIvLen, gcmParams->ulTagBits, + gcmParams->pAAD, gcmParams->ulAADLen); + if (rv != SECSuccess) { + PORT_Free(gcm); + return NULL; + } + gcm->ctr_context_init = PR_TRUE; + + return gcm; +} + +SECStatus +ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm, + const unsigned char *iv, unsigned long ivLen, + unsigned long tagBits, + const unsigned char *aad, unsigned long aadLen) +{ + unsigned int j; + SECStatus rv; + + if (ivLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (tagBits != 128 && tagBits != 120 && tagBits != 112 && + tagBits != 104 && tagBits != 96 && tagBits != 64 && + tagBits != 32) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + gcm->tagBits = tagBits; + + /* reset the aad and message length counters */ + gcm->Alen = 0; + gcm->Mlen = 0; + + /* Initial TAG value is zero */ + PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE); + PORT_Memset(gcm->X0, 0, AES_BLOCK_SIZE); + + /* Init the counter */ + if (ivLen == 12) { + PORT_Memcpy(gcm->CTR, iv, AES_BLOCK_SIZE - 4); + gcm->CTR[12] = 0; + gcm->CTR[13] = 0; + gcm->CTR[14] = 0; + gcm->CTR[15] = 1; + } else { + /* If IV size is not 96 bits, then the initial counter value is GHASH + * of the IV */ + ppc_aes_gcmHASH(gcm->Htbl, iv, ivLen, gcm->T); + + ppc_aes_gcmTAG( + gcm->Htbl, + gcm->T, + ivLen, + 0, + gcm->X0, + gcm->CTR); + + /* TAG should be zero again */ + PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE); + } + + /* Encrypt the initial counter, will be used to encrypt the GHASH value, + * in the end */ + rv = (*gcm->cipher)(gcm->aes_context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR, + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + return SECFailure; + } + + /* Promote the counter by 1 */ + gcm->CTR[14] += !(++gcm->CTR[15]); + gcm->CTR[13] += !(gcm->CTR[15]) && !(gcm->CTR[14]); + gcm->CTR[12] += !(gcm->CTR[15]) && !(gcm->CTR[14]) && !(gcm->CTR[13]); + + /* Now hash AAD - it would actually make sense to seperate the context + * creation from the AAD, because that would allow to reuse the H, which + * only changes when the AES key changes, and not every package, like the + * IV and AAD */ + ppc_aes_gcmHASH(gcm->Htbl, aad, aadLen, gcm->T); + gcm->Alen += aadLen; + return SECSuccess; +} + +void +ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit) +{ + PORT_Memset(gcm, 0, sizeof(ppc_AES_GCMContext)); + if (freeit) { + PORT_Free(gcm); + } +} + +SECStatus +ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + unsigned int j; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + if (UINT_MAX - inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen + tagBytes) { + *outlen = inlen + tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + ppc_aes_gcmCRYPT( + inbuf, + outbuf, + inlen, + gcm->CTR, + gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + ppc_aes_gcmHASH( + gcm->Htbl, + outbuf, + inlen, + gcm->T); + + gcm->Mlen += inlen; + + ppc_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + *outlen = inlen + tagBytes; + + for (j = 0; j < tagBytes; j++) { + outbuf[inlen + j] = T[j]; + } + return SECSuccess; +} + +SECStatus +ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + /* get the authentication block */ + if (inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + inlen -= tagBytes; + intag = inbuf + inlen; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + ppc_aes_gcmHASH( + gcm->Htbl, + inbuf, + inlen, + gcm->T); + ppc_aes_gcmCRYPT( + inbuf, + outbuf, + inlen, + gcm->CTR, + gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + + gcm->Mlen += inlen; + ppc_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} + +SECStatus +ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + SECStatus rv; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* if we were initialized with the C_EncryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulIvFixedBits, gcmParams->ivGenerator); + if (rv != SECSuccess) { + return SECFailure; + } + + rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + ppc_aes_gcmHASH(gcm->Htbl, outbuf, inlen, gcm->T); + + gcm->Mlen += inlen; + + ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T); + + *outlen = inlen; + PORT_Memcpy(gcmParams->pTag, T, tagBytes); + return SECSuccess; +} + +SECStatus +ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + SECStatus rv; + + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* if we were initialized with the C_DecryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + intag = gcmParams->pTag; + PORT_Assert(tagBytes != 0); + + ppc_aes_gcmHASH(gcm->Htbl, inbuf, inlen, gcm->T); + ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + + gcm->Mlen += inlen; + ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} diff --git a/security/nss/lib/freebl/ppc-gcm.h b/security/nss/lib/freebl/ppc-gcm.h new file mode 100644 index 0000000000..1d94c0c338 --- /dev/null +++ b/security/nss/lib/freebl/ppc-gcm.h @@ -0,0 +1,76 @@ +/******************************************************************************/ +/* LICENSE: */ +/* This submission to NSS is to be made available under the terms of the */ +/* Mozilla Public License, v. 2.0. You can obtain one at http: */ +/* //mozilla.org/MPL/2.0/. */ +/******************************************************************************/ + +#ifndef PPC_GCM_H +#define PPC_GCM_H 1 + +#include "blapii.h" + +typedef struct ppc_AES_GCMContextStr ppc_AES_GCMContext; + +ppc_AES_GCMContext *ppc_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params); + +void ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit); + +SECStatus ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +SECStatus ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); +SECStatus ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); + +/* Prototypes of the functions defined in the assembler file. */ + +/* Prepares the constants used in the aggregated reduction method */ +void ppc_aes_gcmINIT(unsigned char Htbl[8 * 16], + PRUint32 *KS, + int NR); + +/* Produces the final GHASH value */ +void ppc_aes_gcmTAG(unsigned char Htbl[8 * 16], + unsigned char *Tp, + unsigned long Mlen, + unsigned long Alen, + unsigned char *X0, + unsigned char *TAG); + +/* Hashes the Additional Authenticated Data, should be used before enc/dec. + Operates on any length of data. Partial block is padded internally. */ +void ppc_aes_gcmHASH(unsigned char Htbl[8 * 16], + const unsigned char *AAD, + unsigned long Alen, + unsigned char *Tp); + +/* Crypt only, used in combination with ppc_aes_gcmAAD(). + Operates on any length of data, however partial block should only be encrypted + at the last call, otherwise the result will be incorrect. */ +void ppc_aes_gcmCRYPT(const unsigned char *PT, + unsigned char *CT, + unsigned long len, + unsigned char *CTRP, + PRUint32 *KS, + int NR); + +#endif diff --git a/security/nss/lib/freebl/ppc-gcm.s b/security/nss/lib/freebl/ppc-gcm.s new file mode 100644 index 0000000000..06ad5862c1 --- /dev/null +++ b/security/nss/lib/freebl/ppc-gcm.s @@ -0,0 +1,1051 @@ +# This submission to NSS is to be made available under the terms of the +# Mozilla Public License, v. 2.0. You can obtain one at //mozilla.org/MPL/2.0/ +# Copyright(c) 2021, Niels Möller and Mamone Tarsha + +# Registers: + +.set SP, 1 +.set TOCP, 2 + +.macro VEC_LOAD_DATA VR, DATA, GPR + addis \GPR, 2, \DATA@got@ha + ld \GPR, \DATA@got@l(\GPR) + lvx \VR, 0, \GPR +.endm + +.macro VEC_LOAD VR, GPR, IDX + lxvd2x \VR+32, \IDX, \GPR + vperm \VR, \VR, \VR, SWAP_MASK +.endm + +.macro VEC_LOAD_INC VR, GPR, IDX + lxvd2x \VR+32, \IDX, \GPR + addi \IDX,\IDX,16 + vperm \VR, \VR, \VR, SWAP_MASK +.endm + +.macro VEC_STORE VR, GPR, IDX + vperm \VR, \VR, \VR, SWAP_MASK + stxvd2x \VR+32, \IDX, \GPR +.endm + +# 0 < LEN < 16, pad the remaining bytes with zeros +.macro LOAD_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2 + li \TMP0, 0 + li \VAL1, 0 + li \VAL0, 0 + andi. \TMP1, \LEN, 8 + beq 1f + ldbrx \VAL1, 0, \DATA + li \TMP0, 8 +1: + andi. \TMP1, \LEN, 7 + beq 3f + li \TMP1, 56 +2: + lbzx \TMP2, \TMP0, \DATA + sld \TMP2, \TMP2, \TMP1 + subi \TMP1, \TMP1, 8 + or \VAL0, \VAL0, \TMP2 + addi \TMP0, \TMP0, 1 + cmpld \TMP0, \LEN + bne 2b + andi. \TMP1, \LEN, 8 + bne 3f + mr \VAL1, \VAL0 + li \VAL0, 0 +3: +.endm + +# 0 < LEN < 16 +.macro STORE_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2 + andi. \TMP1, \LEN, 8 + beq 1f + stdbrx \VAL1, 0, \DATA + li \TMP0, 8 + b 2f +1: + li \TMP0, 0 + mr \VAL0, \VAL1 +2: + andi. \TMP1, \LEN, 7 + beq 4f + li \TMP1, 56 +3: + srd \TMP2, \VAL0, \TMP1 + subi \TMP1, \TMP1, 8 + stbx \TMP2, \TMP0, \DATA + addi \TMP0, \TMP0, 1 + cmpld \TMP0, \LEN + bne 3b +4: +.endm + +.text + +################################################################################ +# Generates the H table +# void ppc_aes_gcmINIT(uint8_t Htbl[16*8], uint32_t *KS, int NR); +.globl ppc_aes_gcmINIT +.type ppc_aes_gcmINIT,@function +.align 5 +ppc_aes_gcmINIT: +addis TOCP,12,(.TOC.-ppc_aes_gcmINIT)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmINIT)@l +.localentry ppc_aes_gcmINIT, .-ppc_aes_gcmINIT + +.set Htbl, 3 +.set KS, 4 +.set NR, 5 + +.set ZERO, 19 +.set MSB, 18 +.set ONE, 17 +.set SWAP_MASK, 0 +.set POLY, 1 +.set K, 2 +.set H, 3 +.set H2, 4 +.set H3, 5 +.set H4, 6 +.set HP, 7 +.set HS, 8 +.set R, 9 +.set F, 10 +.set T, 11 +.set H1M, 12 +.set H1L, 13 +.set H2M, 14 +.set H2L, 15 +.set H3M, 16 +.set H3L, 17 +.set H4M, 18 +.set H4L, 19 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 6 + VEC_LOAD_DATA POLY, .Lpoly, 6 + + li 6, 0 + VEC_LOAD_INC H, KS, 6 + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + cmpwi NR, 10 + beq .LH_done + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + cmpwi NR, 12 + beq .LH_done + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + +.LH_done: + VEC_LOAD K, KS, 6 + vcipherlast H, H, K + + vupkhsb MSB, H + vspltisb ONE, 1 + vspltb MSB, MSB, 0 + vsl H, H, ONE + vand MSB, MSB, POLY + vxor ZERO, ZERO, ZERO + vxor H, H, MSB + vsldoi POLY, ZERO, POLY, 8 + + vpmsumd HP, H, POLY + vsldoi HS, H, H, 8 + vxor HP, HP, HS + vsldoi H1L, HP, HS, 8 + vsldoi H1M, HS, HP, 8 + vsldoi H1L, H1L, H1L, 8 + + # calculate H^2 + + vpmsumd F, H, H1L + vpmsumd R, H, H1M + + vpmsumd T, F, POLY + vsldoi H2, F, F, 8 + vxor R, R, T + vxor H2, H2, R + + vpmsumd HP, H2, POLY + vsldoi HS, H2, H2, 8 + vxor HP, HP, HS + vsldoi H2L, HP, HS, 8 + vsldoi H2M, HS, HP, 8 + vsldoi H2L, H2L, H2L, 8 + + # calculate H^3 + + vpmsumd F, H2, H1L + vpmsumd R, H2, H1M + + vpmsumd T, F, POLY + vsldoi H3, F, F, 8 + vxor R, R, T + vxor H3, H3, R + + vpmsumd HP, H3, POLY + vsldoi HS, H3, H3, 8 + vxor HP, HP, HS + vsldoi H3L, HP, HS, 8 + vsldoi H3M, HS, HP, 8 + vsldoi H3L, H3L, H3L, 8 + + # calculate H^4 + + vpmsumd F, H2, H2L + vpmsumd R, H2, H2M + + vpmsumd T, F, POLY + vsldoi H4, F, F, 8 + vxor R, R, T + vxor H4, H4, R + + vpmsumd HP, H4, POLY + vsldoi HS, H4, H4, 8 + vxor HP, HP, HS + vsldoi H4L, HP, HS, 8 + vsldoi H4M, HS, HP, 8 + vsldoi H4L, H4L, H4L, 8 + + li 8, 16*1 + li 9, 16*2 + li 10, 16*3 + stxvd2x H1L+32, 0, Htbl + stxvd2x H1M+32, 8, Htbl + stxvd2x H2L+32, 9, Htbl + stxvd2x H2M+32, 10, Htbl + li 7, 16*4 + li 8, 16*5 + li 9, 16*6 + li 10, 16*7 + stxvd2x H3L+32, 7, Htbl + stxvd2x H3M+32, 8, Htbl + stxvd2x H4L+32, 9, Htbl + stxvd2x H4M+32, 10, Htbl + + blr +.size ppc_aes_gcmINIT, . - ppc_aes_gcmINIT + +################################################################################ +# Authenticate only +# void ppc_aes_gcmHASH(uint8_t Htbl[16*8], uint8_t *AAD, uint64_t Alen, uint8_t *Tp); +.globl ppc_aes_gcmHASH +.type ppc_aes_gcmHASH,@function +.align 5 +ppc_aes_gcmHASH: +addis TOCP,12,(.TOC.-ppc_aes_gcmHASH)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmHASH)@l +.localentry ppc_aes_gcmHASH, .-ppc_aes_gcmHASH + +.set Htbl, 3 +.set AAD, 4 +.set Alen, 5 +.set Tp, 6 + +.set SWAP_MASK, 0 +.set POLY, 1 +.set D, 2 +.set C0, 3 +.set C1, 4 +.set C2, 5 +.set C3, 6 +.set T, 7 +.set R, 8 +.set F, 9 +.set R2, 10 +.set F2, 11 +.set R3, 12 +.set F3, 13 +.set R4, 14 +.set F4, 15 +.set H1M, 16 +.set H1L, 17 +.set H2M, 18 +.set H2L, 19 +.set H3M, 28 +.set H3L, 29 +.set H4M, 30 +.set H4L, 31 + + # store non-volatile vector registers + addi 7, SP, -16 + stvx 31, 0, 7 + addi 7, SP, -32 + stvx 30, 0, 7 + addi 7, SP, -48 + stvx 29, 0, 7 + addi 7, SP, -64 + stvx 28, 0, 7 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 7 + VEC_LOAD_DATA POLY, .Lpoly_r, 7 + + VEC_LOAD D, Tp, 0 + + # --- process 4 blocks --- + + srdi. 7, Alen, 6 # 4-blocks loop count + beq .L2x + + mtctr 7 # set counter register + + # load table elements + li 8, 1*16 + li 9, 2*16 + li 10, 3*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + lxvd2x H2L+32, 9, Htbl + lxvd2x H2M+32, 10, Htbl + li 7, 4*16 + li 8, 5*16 + li 9, 6*16 + li 10, 7*16 + lxvd2x H3L+32, 7, Htbl + lxvd2x H3M+32, 8, Htbl + lxvd2x H4L+32, 9, Htbl + lxvd2x H4M+32, 10, Htbl + + li 8, 0x10 + li 9, 0x20 + li 10, 0x30 +.align 5 +.L4x_loop: + # load input + lxvd2x C0+32, 0, AAD + lxvd2x C1+32, 8, AAD + lxvd2x C2+32, 9, AAD + lxvd2x C3+32, 10, AAD + + vperm C0, C0, C0, SWAP_MASK + vperm C1, C1, C1, SWAP_MASK + vperm C2, C2, C2, SWAP_MASK + vperm C3, C3, C3, SWAP_MASK + + # digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F2, H3L, C1 + vpmsumd R2, H3M, C1 + vpmsumd F3, H2L, C2 + vpmsumd R3, H2M, C2 + vpmsumd F4, H1L, C3 + vpmsumd R4, H1M, C3 + vpmsumd F, H4L, C0 + vpmsumd R, H4M, C0 + + # deferred recombination of partial products + vxor F3, F3, F4 + vxor R3, R3, R4 + vxor F, F, F2 + vxor R, R, R2 + vxor F, F, F3 + vxor R, R, R3 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + addi AAD, AAD, 0x40 + bdnz .L4x_loop + + clrldi Alen, Alen, 58 +.L2x: + # --- process 2 blocks --- + + srdi. 7, Alen, 5 + beq .L1x + + # load table elements + li 8, 1*16 + li 9, 2*16 + li 10, 3*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + lxvd2x H2L+32, 9, Htbl + lxvd2x H2M+32, 10, Htbl + + # load input + li 10, 0x10 + lxvd2x C0+32, 0, AAD + lxvd2x C1+32, 10, AAD + + vperm C0, C0, C0, SWAP_MASK + vperm C1, C1, C1, SWAP_MASK + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F2, H1L, C1 + vpmsumd R2, H1M, C1 + vpmsumd F, H2L, C0 + vpmsumd R, H2M, C0 + + # deferred recombination of partial products + vxor F, F, F2 + vxor R, R, R2 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + addi AAD, AAD, 0x20 + clrldi Alen, Alen, 59 +.L1x: + # --- process 1 block --- + + srdi. 7, Alen, 4 + beq .Ltail + + # load table elements + li 8, 1*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + + # load input + lxvd2x C0+32, 0, AAD + + vperm C0, C0, C0, SWAP_MASK + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F, H1L, C0 + vpmsumd R, H1M, C0 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + addi AAD, AAD, 0x10 + clrldi Alen, Alen, 60 + +.Ltail: + cmpldi Alen, 0 + beq .Lh_done + # --- process the final partial block --- + + # load table elements + li 8, 1*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + + LOAD_LEN AAD, Alen, 10, 9, 3, 7, 8 + mtvrd C0, 10 + mtvrd C1, 9 + xxmrghd C0+32, C0+32, C1+32 + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F, H1L, C0 + vpmsumd R, H1M, C0 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D +.Lh_done: + VEC_STORE D, Tp, 0 + + # restore non-volatile vector registers + addi 7, SP, -16 + lvx 31, 0, 7 + addi 7, SP, -32 + lvx 30, 0, 7 + addi 7, SP, -48 + lvx 29, 0, 7 + addi 7, SP, -64 + lvx 28, 0, 7 + blr +.size ppc_aes_gcmHASH, . - ppc_aes_gcmHASH + +################################################################################ +# Generates the final GCM tag +# void ppc_aes_gcmTAG(uint8_t Htbl[16*8], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG); +.globl ppc_aes_gcmTAG +.type ppc_aes_gcmTAG,@function +.align 5 +ppc_aes_gcmTAG: +addis TOCP,12,(.TOC.-ppc_aes_gcmTAG)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmTAG)@l +.localentry ppc_aes_gcmTAG, .-ppc_aes_gcmTAG + +.set Htbl, 3 +.set Tp, 4 +.set Mlen, 5 +.set Alen, 6 +.set X0, 7 +.set TAG, 8 + +.set SWAP_MASK, 0 +.set POLY, 1 +.set D, 2 +.set C0, 3 +.set C1, 4 +.set T, 5 +.set R, 6 +.set F, 7 +.set H1M, 8 +.set H1L, 9 +.set X, 10 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9 + VEC_LOAD_DATA POLY, .Lpoly_r, 9 + + VEC_LOAD D, Tp, 0 + + # load table elements + li 9, 1*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 9, Htbl + + sldi Alen, Alen, 3 + sldi Mlen, Mlen, 3 + mtvrd C0, Alen + mtvrd C1, Mlen + xxmrghd C0+32, C0+32, C1+32 + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F, H1L, C0 + vpmsumd R, H1M, C0 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + lxvd2x X+32, 0, X0 + vperm D, D, D, SWAP_MASK + vxor X, X, D + stxvd2x X+32, 0, TAG + + blr +.size ppc_aes_gcmTAG, . - ppc_aes_gcmTAG + +################################################################################ +# Crypt only +# void ppc_aes_gcmCRYPT(const uint8_t* PT, uint8_t* CT, uint64_t LEN, uint8_t *CTRP, uint32_t *KS, int NR); +.globl ppc_aes_gcmCRYPT +.type ppc_aes_gcmCRYPT,@function +.align 5 +ppc_aes_gcmCRYPT: +addis TOCP,12,(.TOC.-ppc_aes_gcmCRYPT)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmCRYPT)@l +.localentry ppc_aes_gcmCRYPT, .-ppc_aes_gcmCRYPT + +.set PT, 3 +.set CT, 4 +.set LEN, 5 +.set CTRP, 6 +.set KS, 7 +.set NR, 8 + +.set SWAP_MASK, 0 +.set K, 1 +.set CTR, 2 +.set CTR0, 3 +.set CTR1, 4 +.set CTR2, 5 +.set CTR3, 6 +.set CTR4, 7 +.set CTR5, 8 +.set CTR6, 9 +.set CTR7, 10 +.set ZERO, 11 +.set I1, 12 +.set I2, 13 +.set I3, 14 +.set I4, 15 +.set I5, 16 +.set I6, 17 +.set I7, 18 +.set I8, 19 +.set IN0, 24 +.set IN1, 25 +.set IN2, 26 +.set IN3, 27 +.set IN4, 28 +.set IN5, 29 +.set IN6, 30 +.set IN7, 31 + +.macro ROUND_8 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K + vcipher CTR1, CTR1, K + vcipher CTR2, CTR2, K + vcipher CTR3, CTR3, K + vcipher CTR4, CTR4, K + vcipher CTR5, CTR5, K + vcipher CTR6, CTR6, K + vcipher CTR7, CTR7, K +.endm + +.macro ROUND_4 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K + vcipher CTR1, CTR1, K + vcipher CTR2, CTR2, K + vcipher CTR3, CTR3, K +.endm + +.macro ROUND_2 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K + vcipher CTR1, CTR1, K +.endm + +.macro ROUND_1 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K +.endm + + # store non-volatile general registers + std 31,-8(SP); + std 30,-16(SP); + std 29,-24(SP); + std 28,-32(SP); + std 27,-40(SP); + std 26,-48(SP); + std 25,-56(SP); + + # store non-volatile vector registers + addi 9, SP, -80 + stvx 31, 0, 9 + addi 9, SP, -96 + stvx 30, 0, 9 + addi 9, SP, -112 + stvx 29, 0, 9 + addi 9, SP, -128 + stvx 28, 0, 9 + addi 9, SP, -144 + stvx 27, 0, 9 + addi 9, SP, -160 + stvx 26, 0, 9 + addi 9, SP, -176 + stvx 25, 0, 9 + addi 9, SP, -192 + stvx 24, 0, 9 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9 + + vxor ZERO, ZERO, ZERO + vspltisb I1, 1 + vspltisb I2, 2 + vspltisb I3, 3 + vspltisb I4, 4 + vspltisb I5, 5 + vspltisb I6, 6 + vspltisb I7, 7 + vspltisb I8, 8 + vsldoi I1, ZERO, I1, 1 + vsldoi I2, ZERO, I2, 1 + vsldoi I3, ZERO, I3, 1 + vsldoi I4, ZERO, I4, 1 + vsldoi I5, ZERO, I5, 1 + vsldoi I6, ZERO, I6, 1 + vsldoi I7, ZERO, I7, 1 + vsldoi I8, ZERO, I8, 1 + + VEC_LOAD CTR, CTRP, 0 + + srdi. 9, LEN, 7 + beq .Lctr_4x + + mtctr 9 + + li 25, 0x10 + li 26, 0x20 + li 27, 0x30 + li 28, 0x40 + li 29, 0x50 + li 30, 0x60 + li 31, 0x70 + +.align 5 +.L8x_loop: + li 10, 0 + VEC_LOAD_INC K, KS, 10 + + vadduwm CTR1, CTR, I1 + vadduwm CTR2, CTR, I2 + vadduwm CTR3, CTR, I3 + vadduwm CTR4, CTR, I4 + vadduwm CTR5, CTR, I5 + vadduwm CTR6, CTR, I6 + vadduwm CTR7, CTR, I7 + + vxor CTR0, CTR, K + vxor CTR1, CTR1, K + vxor CTR2, CTR2, K + vxor CTR3, CTR3, K + vxor CTR4, CTR4, K + vxor CTR5, CTR5, K + vxor CTR6, CTR6, K + vxor CTR7, CTR7, K + + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + cmpwi NR, 10 + beq .Llast_8 + ROUND_8 + ROUND_8 + cmpwi NR, 12 + beq .Llast_8 + ROUND_8 + ROUND_8 + +.Llast_8: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + vcipherlast CTR1, CTR1, K + vcipherlast CTR2, CTR2, K + vcipherlast CTR3, CTR3, K + vcipherlast CTR4, CTR4, K + vcipherlast CTR5, CTR5, K + vcipherlast CTR6, CTR6, K + vcipherlast CTR7, CTR7, K + + lxvd2x IN0+32, 0, PT + lxvd2x IN1+32, 25, PT + lxvd2x IN2+32, 26, PT + lxvd2x IN3+32, 27, PT + lxvd2x IN4+32, 28, PT + lxvd2x IN5+32, 29, PT + lxvd2x IN6+32, 30, PT + lxvd2x IN7+32, 31, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + vperm CTR1, CTR1, CTR1, SWAP_MASK + vperm CTR2, CTR2, CTR2, SWAP_MASK + vperm CTR3, CTR3, CTR3, SWAP_MASK + vperm CTR4, CTR4, CTR4, SWAP_MASK + vperm CTR5, CTR5, CTR5, SWAP_MASK + vperm CTR6, CTR6, CTR6, SWAP_MASK + vperm CTR7, CTR7, CTR7, SWAP_MASK + + vxor IN0, IN0, CTR0 + vxor IN1, IN1, CTR1 + vxor IN2, IN2, CTR2 + vxor IN3, IN3, CTR3 + vxor IN4, IN4, CTR4 + vxor IN5, IN5, CTR5 + vxor IN6, IN6, CTR6 + vxor IN7, IN7, CTR7 + + stxvd2x IN0+32, 0, CT + stxvd2x IN1+32, 25, CT + stxvd2x IN2+32, 26, CT + stxvd2x IN3+32, 27, CT + stxvd2x IN4+32, 28, CT + stxvd2x IN5+32, 29, CT + stxvd2x IN6+32, 30, CT + stxvd2x IN7+32, 31, CT + + vadduwm CTR, CTR, I8 + addi PT, PT, 0x80 + addi CT, CT, 0x80 + bdnz .L8x_loop + + clrldi LEN, LEN, 57 + +.Lctr_4x: + srdi. 9, LEN, 6 + beq .Lctr_2x + + li 10, 0 + li 29, 0x10 + li 30, 0x20 + li 31, 0x30 + + VEC_LOAD_INC K, KS, 10 + + vadduwm CTR1, CTR, I1 + vadduwm CTR2, CTR, I2 + vadduwm CTR3, CTR, I3 + + vxor CTR0, CTR, K + vxor CTR1, CTR1, K + vxor CTR2, CTR2, K + vxor CTR3, CTR3, K + + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + cmpwi NR, 10 + beq .Llast_4 + ROUND_4 + ROUND_4 + cmpwi NR, 12 + beq .Llast_4 + ROUND_4 + ROUND_4 + +.Llast_4: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + vcipherlast CTR1, CTR1, K + vcipherlast CTR2, CTR2, K + vcipherlast CTR3, CTR3, K + + lxvd2x IN0+32, 0, PT + lxvd2x IN1+32, 29, PT + lxvd2x IN2+32, 30, PT + lxvd2x IN3+32, 31, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + vperm CTR1, CTR1, CTR1, SWAP_MASK + vperm CTR2, CTR2, CTR2, SWAP_MASK + vperm CTR3, CTR3, CTR3, SWAP_MASK + + vxor IN0, IN0, CTR0 + vxor IN1, IN1, CTR1 + vxor IN2, IN2, CTR2 + vxor IN3, IN3, CTR3 + + stxvd2x IN0+32, 0, CT + stxvd2x IN1+32, 29, CT + stxvd2x IN2+32, 30, CT + stxvd2x IN3+32, 31, CT + + vadduwm CTR, CTR, I4 + addi PT, PT, 0x40 + addi CT, CT, 0x40 + + clrldi LEN, LEN, 58 + +.Lctr_2x: + srdi. 9, LEN, 5 + beq .Lctr_1x + + li 10, 0 + li 31, 0x10 + + VEC_LOAD_INC K, KS, 10 + + vadduwm CTR1, CTR, I1 + + vxor CTR0, CTR, K + vxor CTR1, CTR1, K + + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + cmpwi NR, 10 + beq .Llast_2 + ROUND_2 + ROUND_2 + cmpwi NR, 12 + beq .Llast_2 + ROUND_2 + ROUND_2 + +.Llast_2: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + vcipherlast CTR1, CTR1, K + + lxvd2x IN0+32, 0, PT + lxvd2x IN1+32, 31, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + vperm CTR1, CTR1, CTR1, SWAP_MASK + + vxor IN0, IN0, CTR0 + vxor IN1, IN1, CTR1 + + stxvd2x IN0+32, 0, CT + stxvd2x IN1+32, 31, CT + + vadduwm CTR, CTR, I2 + addi PT, PT, 0x20 + addi CT, CT, 0x20 + + clrldi LEN, LEN, 59 + +.Lctr_1x: + srdi. 9, LEN, 4 + beq .Lctr_tail + + li 10, 0 + + VEC_LOAD_INC K, KS, 10 + vxor CTR0, CTR, K + + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + cmpwi NR, 10 + beq .Llast_1 + ROUND_1 + ROUND_1 + cmpwi NR, 12 + beq .Llast_1 + ROUND_1 + ROUND_1 + +.Llast_1: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + + lxvd2x IN0+32, 0, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + + vxor IN0, IN0, CTR0 + + stxvd2x IN0+32, 0, CT + + vadduwm CTR, CTR, I1 + addi PT, PT, 0x10 + addi CT, CT, 0x10 + + clrldi LEN, LEN, 60 + +.Lctr_tail: + cmpldi LEN, 0 + beq .Lc_done + + li 10, 0 + + VEC_LOAD_INC K, KS, 10 + vxor CTR0, CTR, K + + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + cmpwi NR, 10 + beq .Llast_tail + ROUND_1 + ROUND_1 + cmpwi NR, 12 + beq .Llast_tail + ROUND_1 + ROUND_1 + +.Llast_tail: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + + LOAD_LEN PT, LEN, 10, 9, 29, 30, 31 + + vsldoi CTR1, CTR0, CTR0, 8 + mfvrd 31, CTR0 + mfvrd 30, CTR1 + + xor 10, 10, 31 + xor 9, 9, 30 + + STORE_LEN CT, LEN, 10, 9, 29, 30, 31 + + vadduwm CTR, CTR, I1 + +.Lc_done: + VEC_STORE CTR, CTRP, 0 + + # restore non-volatile vector registers + addi 9, SP, -80 + lvx 31, 0, 9 + addi 9, SP, -96 + lvx 30, 0, 9 + addi 9, SP, -112 + lvx 29, 0, 9 + addi 9, SP, -128 + lvx 28, 0, 9 + addi 9, SP, -144 + lvx 27, 0, 9 + addi 9, SP, -160 + lvx 26, 0, 9 + addi 9, SP, -176 + lvx 25, 0, 9 + addi 9, SP, -192 + lvx 24, 0, 9 + + # restore non-volatile general registers + ld 31,-8(SP); + ld 30,-16(SP); + ld 29,-24(SP); + ld 28,-32(SP); + ld 27,-40(SP); + ld 26,-48(SP); + ld 25,-56(SP); + blr +.size ppc_aes_gcmCRYPT, . - ppc_aes_gcmCRYPT + +.data +.align 4 +.Lpoly: + .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lpoly_r: + .byte 0,0,0,0,0,0,0,0xc2,0,0,0,0,0,0,0,0 +.Ldb_bswap_mask: + .byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7 diff --git a/security/nss/lib/freebl/pqg.c b/security/nss/lib/freebl/pqg.c new file mode 100644 index 0000000000..8933b602b2 --- /dev/null +++ b/security/nss/lib/freebl/pqg.c @@ -0,0 +1,1926 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * PQG parameter generation/verification. Based on FIPS 186-3. + */ +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" +#include "secitem.h" +#include "mpi.h" +#include "mpprime.h" +#include "mplogic.h" +#include "secmpi.h" + +#define MAX_ITERATIONS 1000 /* Maximum number of iterations of primegen */ + +typedef enum { + FIPS186_1_TYPE, /* Probablistic */ + FIPS186_3_TYPE, /* Probablistic */ + FIPS186_3_ST_TYPE /* Shawe-Taylor provable */ +} pqgGenType; + +/* + * These test iterations are quite a bit larger than we previously had. + * This is because FIPS 186-3 is worried about the primes in PQG generation. + * It may be possible to purposefully construct composites which more + * iterations of Miller-Rabin than the for your normal randomly selected + * numbers.There are 3 ways to counter this: 1) use one of the cool provably + * prime algorithms (which would require a lot more work than DSA-2 deservers. + * 2) add a Lucas primality test (which requires coding a Lucas primality test, + * or 3) use a larger M-R test count. I chose the latter. It increases the time + * that it takes to prove the selected prime, but it shouldn't increase the + * overall time to run the algorithm (non-primes should still faile M-R + * realively quickly). If you want to get that last bit of performance, + * implement Lucas and adjust these two functions. See FIPS 186-3 Appendix C + * and F for more information. + */ +static int +prime_testcount_p(int L, int N) +{ + switch (L) { + case 1024: + return 40; + case 2048: + return 56; + case 3072: + return 64; + default: + break; + } + return 50; /* L = 512-960 */ +} + +/* The q numbers are different if you run M-R followd by Lucas. I created + * a separate function so if someone wanted to add the Lucas check, they + * could do so fairly easily */ +static int +prime_testcount_q(int L, int N) +{ + return prime_testcount_p(L, N); +} + +/* + * generic function to make sure our input matches DSA2 requirements + * this gives us one place to go if we need to bump the requirements in the + * future. + */ +static SECStatus +pqg_validate_dsa2(unsigned int L, unsigned int N) +{ + + switch (L) { + case 1024: + if (N != DSA1_Q_BITS) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + break; + case 2048: + if ((N != 224) && (N != 256)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + break; + case 3072: + if (N != 256) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + break; + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return SECSuccess; +} + +static unsigned int +pqg_get_default_N(unsigned int L) +{ + unsigned int N = 0; + switch (L) { + case 1024: + N = DSA1_Q_BITS; + break; + case 2048: + N = 224; + break; + case 3072: + N = 256; + break; + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + break; /* N already set to zero */ + } + return N; +} + +/* + * Select the lowest hash algorithm usable + */ +static HASH_HashType +getFirstHash(unsigned int L, unsigned int N) +{ + if (N < 224) { + return HASH_AlgSHA1; + } + if (N < 256) { + return HASH_AlgSHA224; + } + if (N < 384) { + return HASH_AlgSHA256; + } + if (N < 512) { + return HASH_AlgSHA384; + } + return HASH_AlgSHA512; +} + +/* + * find the next usable hash algorthim + */ +static HASH_HashType +getNextHash(HASH_HashType hashtype) +{ + switch (hashtype) { + case HASH_AlgSHA1: + hashtype = HASH_AlgSHA224; + break; + case HASH_AlgSHA224: + hashtype = HASH_AlgSHA256; + break; + case HASH_AlgSHA256: + hashtype = HASH_AlgSHA384; + break; + case HASH_AlgSHA384: + hashtype = HASH_AlgSHA512; + break; + case HASH_AlgSHA512: + default: + hashtype = HASH_AlgTOTAL; + break; + } + return hashtype; +} + +static unsigned int +HASH_ResultLen(HASH_HashType type) +{ + const SECHashObject *hash_obj = HASH_GetRawHashObject(type); + PORT_Assert(hash_obj != NULL); + if (hash_obj == NULL) { + /* type is always a valid HashType. Thus a null hash_obj must be a bug */ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return 0; + } + PORT_Assert(hash_obj->length != 0); + return hash_obj->length; +} + +static SECStatus +HASH_HashBuf(HASH_HashType type, unsigned char *dest, + const unsigned char *src, PRUint32 src_len) +{ + const SECHashObject *hash_obj = HASH_GetRawHashObject(type); + void *hashcx = NULL; + unsigned int dummy; + + if (hash_obj == NULL) { + return SECFailure; + } + + hashcx = hash_obj->create(); + if (hashcx == NULL) { + return SECFailure; + } + hash_obj->begin(hashcx); + hash_obj->update(hashcx, src, src_len); + hash_obj->end(hashcx, dest, &dummy, hash_obj->length); + hash_obj->destroy(hashcx, PR_TRUE); + return SECSuccess; +} + +unsigned int +PQG_GetLength(const SECItem *obj) +{ + unsigned int len = obj->len; + + if (obj->data == NULL) { + return 0; + } + if (len > 1 && obj->data[0] == 0) { + len--; + } + return len; +} + +SECStatus +PQG_Check(const PQGParams *params) +{ + unsigned int L, N; + SECStatus rv = SECSuccess; + + if (params == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + L = PQG_GetLength(¶ms->prime) * PR_BITS_PER_BYTE; + N = PQG_GetLength(¶ms->subPrime) * PR_BITS_PER_BYTE; + + if (L < 1024) { + int j; + + /* handle DSA1 pqg parameters with less thatn 1024 bits*/ + if (N != DSA1_Q_BITS) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + j = PQG_PBITS_TO_INDEX(L); + if (j < 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + } + } else { + /* handle DSA2 parameters (includes DSA1, 1024 bits) */ + rv = pqg_validate_dsa2(L, N); + } + return rv; +} + +HASH_HashType +PQG_GetHashType(const PQGParams *params) +{ + unsigned int L, N; + + if (params == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return HASH_AlgNULL; + } + + L = PQG_GetLength(¶ms->prime) * PR_BITS_PER_BYTE; + N = PQG_GetLength(¶ms->subPrime) * PR_BITS_PER_BYTE; + return getFirstHash(L, N); +} + +/* Get a seed for generating P and Q. If in testing mode, copy in the +** seed from FIPS 186-1 appendix 5. Otherwise, obtain bytes from the +** global random number generator. +*/ +static SECStatus +getPQseed(SECItem *seed, PLArenaPool *arena) +{ + SECStatus rv; + + if (!seed->data) { + seed->data = (unsigned char *)PORT_ArenaZAlloc(arena, seed->len); + } + if (!seed->data) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + rv = RNG_GenerateGlobalRandomBytes(seed->data, seed->len); + /* + * NIST CMVP disallows a sequence of 20 bytes with the most + * significant byte equal to 0. Perhaps they interpret + * "a sequence of at least 160 bits" as "a number >= 2^159". + * So we always set the most significant bit to 1. (bug 334533) + */ + seed->data[0] |= 0x80; + return rv; +} + +/* Generate a candidate h value. If in testing mode, use the h value +** specified in FIPS 186-1 appendix 5, h = 2. Otherwise, obtain bytes +** from the global random number generator. +*/ +static SECStatus +generate_h_candidate(SECItem *hit, mp_int *H) +{ + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; +#ifdef FIPS_186_1_A5_TEST + memset(hit->data, 0, hit->len); + hit->data[hit->len - 1] = 0x02; +#else + rv = RNG_GenerateGlobalRandomBytes(hit->data, hit->len); +#endif + if (rv) + return SECFailure; + err = mp_read_unsigned_octets(H, hit->data, hit->len); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return SECSuccess; +} + +static SECStatus +addToSeed(const SECItem *seed, + unsigned long addend, + int seedlen, /* g in 186-1 */ + SECItem *seedout) +{ + mp_int s, sum, modulus, tmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&s) = 0; + MP_DIGITS(&sum) = 0; + MP_DIGITS(&modulus) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&s)); + CHECK_MPI_OK(mp_init(&sum)); + CHECK_MPI_OK(mp_init(&modulus)); + SECITEM_TO_MPINT(*seed, &s); /* s = seed */ + /* seed += addend */ + if (sizeof(addend) < sizeof(mp_digit) || addend < MP_DIGIT_MAX) { + CHECK_MPI_OK(mp_add_d(&s, (mp_digit)addend, &s)); + } else { + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_set_ulong(&tmp, addend)); + CHECK_MPI_OK(mp_add(&s, &tmp, &s)); + } + /*sum = s mod 2**seedlen */ + CHECK_MPI_OK(mp_div_2d(&s, (mp_digit)seedlen, NULL, &sum)); + if (seedout->data != NULL) { + SECITEM_ZfreeItem(seedout, PR_FALSE); + } + MPINT_TO_SECITEM(&sum, seedout, NULL); +cleanup: + mp_clear(&s); + mp_clear(&sum); + mp_clear(&modulus); + mp_clear(&tmp); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* Compute Hash[(SEED + addend) mod 2**g] +** Result is placed in shaOutBuf. +** This computation is used in steps 2 and 7 of FIPS 186 Appendix 2.2 and +** step 11.2 of FIPS 186-3 Appendix A.1.1.2 . +*/ +static SECStatus +addToSeedThenHash(HASH_HashType hashtype, + const SECItem *seed, + unsigned long addend, + int seedlen, /* g in 186-1 */ + unsigned char *hashOutBuf) +{ + SECItem str = { 0, 0, 0 }; + SECStatus rv; + rv = addToSeed(seed, addend, seedlen, &str); + if (rv != SECSuccess) { + return rv; + } + rv = HASH_HashBuf(hashtype, hashOutBuf, str.data, str.len); /* hash result */ + if (str.data) + SECITEM_ZfreeItem(&str, PR_FALSE); + return rv; +} + +/* +** Perform steps 2 and 3 of FIPS 186-1, appendix 2.2. +** Generate Q from seed. +*/ +static SECStatus +makeQfromSeed( + unsigned int g, /* input. Length of seed in bits. */ + const SECItem *seed, /* input. */ + mp_int *Q) /* output. */ +{ + unsigned char sha1[SHA1_LENGTH]; + unsigned char sha2[SHA1_LENGTH]; + unsigned char U[SHA1_LENGTH]; + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; + int i; + /* ****************************************************************** + ** Step 2. + ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]." + **/ + CHECK_SEC_OK(SHA1_HashBuf(sha1, seed->data, seed->len)); + CHECK_SEC_OK(addToSeedThenHash(HASH_AlgSHA1, seed, 1, g, sha2)); + for (i = 0; i < SHA1_LENGTH; ++i) + U[i] = sha1[i] ^ sha2[i]; + /* ****************************************************************** + ** Step 3. + ** "Form Q from U by setting the most signficant bit (the 2**159 bit) + ** and the least signficant bit to 1. In terms of boolean operations, + ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160." + */ + U[0] |= 0x80; /* U is MSB first */ + U[SHA1_LENGTH - 1] |= 0x01; + err = mp_read_unsigned_octets(Q, U, SHA1_LENGTH); +cleanup: + memset(U, 0, SHA1_LENGTH); + memset(sha1, 0, SHA1_LENGTH); + memset(sha2, 0, SHA1_LENGTH); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* +** Perform steps 6 and 7 of FIPS 186-3, appendix A.1.1.2. +** Generate Q from seed. +*/ +static SECStatus +makeQ2fromSeed( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int N, /* input. Length of q in bits. */ + const SECItem *seed, /* input. */ + mp_int *Q) /* output. */ +{ + unsigned char U[HASH_LENGTH_MAX]; + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; + int N_bytes = N / PR_BITS_PER_BYTE; /* length of N in bytes rather than bits */ + int hashLen = HASH_ResultLen(hashtype); + int offset = 0; + + /* ****************************************************************** + ** Step 6. + ** "Compute U = hash[SEED] mod 2**N-1]." + **/ + CHECK_SEC_OK(HASH_HashBuf(hashtype, U, seed->data, seed->len)); + /* mod 2**N . Step 7 will explicitly set the top bit to 1, so no need + * to handle mod 2**N-1 */ + if (hashLen > N_bytes) { + offset = hashLen - N_bytes; + } + /* ****************************************************************** + ** Step 7. + ** computed_q = 2**(N-1) + U + 1 - (U mod 2) + ** + ** This is the same as: + ** computed_q = 2**(N-1) | U | 1; + */ + U[offset] |= 0x80; /* U is MSB first */ + U[hashLen - 1] |= 0x01; + err = mp_read_unsigned_octets(Q, &U[offset], N_bytes); +cleanup: + memset(U, 0, HASH_LENGTH_MAX); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* +** Perform steps from FIPS 186-3, Appendix A.1.2.1 and Appendix C.6 +** +** This generates a provable prime from two smaller prime. The resulting +** prime p will have q0 as a multiple of p-1. q0 can be 1. +** +** This implments steps 4 thorough 22 of FIPS 186-3 A.1.2.1 and +** steps 16 through 34 of FIPS 186-2 C.6 +*/ +static SECStatus +makePrimefromPrimesShaweTaylor( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int length, /* input. Length of prime in bits. */ + unsigned int seedlen, /* input seed length in bits */ + mp_int *c0, /* seed prime */ + mp_int *q, /* sub prime, can be 1 */ + mp_int *prime, /* output. */ + SECItem *prime_seed, /* input/output. */ + unsigned int *prime_gen_counter) /* input/output. */ +{ + mp_int c; + mp_int c0_2; + mp_int t; + mp_int a; + mp_int z; + mp_int two_length_minus_1; + SECStatus rv = SECFailure; + int hashlen = HASH_ResultLen(hashtype); + int outlen = hashlen * PR_BITS_PER_BYTE; + int offset; + unsigned char bit, mask; + /* x needs to hold roundup(L/outlen)*outlen. + * This can be no larger than L+outlen-1, So we set it's size to + * our max L + max outlen and know we are safe */ + unsigned char x[DSA_MAX_P_BITS / 8 + HASH_LENGTH_MAX]; + mp_err err = MP_OKAY; + int i; + int iterations; + int old_counter; + + MP_DIGITS(&c) = 0; + MP_DIGITS(&c0_2) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&z) = 0; + MP_DIGITS(&two_length_minus_1) = 0; + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&c0_2)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&z)); + CHECK_MPI_OK(mp_init(&two_length_minus_1)); + + /* + ** There is a slight mapping of variable names depending on which + ** FIPS 186 steps are being carried out. The mapping is as follows: + ** variable A.1.2.1 C.6 + ** c0 p0 c0 + ** q q 1 + ** c p c + ** c0_2 2*p0*q 2*c0 + ** length L length + ** prime_seed pseed prime_seed + ** prime_gen_counter pgen_counter prime_gen_counter + ** + ** Also note: or iterations variable is actually iterations+1, since + ** iterations+1 works better in C. + */ + + /* Step 4/16 iterations = ceiling(length/outlen)-1 */ + iterations = (length + outlen - 1) / outlen; /* NOTE: iterations +1 */ + /* Step 5/17 old_counter = prime_gen_counter */ + old_counter = *prime_gen_counter; + /* + ** Comment: Generate a pseudorandom integer x in the interval + ** [2**(length-1), 2**length]. + ** + ** Step 6/18 x = 0 + */ + PORT_Memset(x, 0, sizeof(x)); + /* + ** Step 7/19 for i = 0 to iterations do + ** x = x + (HASH(prime_seed + i) * 2^(i*outlen)) + */ + for (i = 0; i < iterations; i++) { + /* is bigger than prime_seed should get to */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i, + seedlen, &x[(iterations - i - 1) * hashlen])); + } + /* Step 8/20 prime_seed = prime_seed + iterations + 1 */ + CHECK_SEC_OK(addToSeed(prime_seed, iterations, seedlen, prime_seed)); + /* + ** Step 9/21 x = 2 ** (length-1) + x mod 2 ** (length-1) + ** + ** This step mathematically sets the high bit and clears out + ** all the other bits higher than length. 'x' is stored + ** in the x array, MSB first. The above formula gives us an 'x' + ** which is length bytes long and has the high bit set. We also know + ** that length <= iterations*outlen since + ** iterations=ceiling(length/outlen). First we find the offset in + ** bytes into the array where the high bit is. + */ + offset = (outlen * iterations - length) / PR_BITS_PER_BYTE; + /* now we want to set the 'high bit', since length may not be a + * multiple of 8,*/ + bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */ + /* we need to zero out the rest of the bits in the byte above */ + mask = (bit - 1); + /* now we set it */ + x[offset] = (mask & x[offset]) | bit; + /* + ** Comment: Generate a candidate prime c in the interval + ** [2**(length-1), 2**length]. + ** + ** Step 10 t = ceiling(x/(2q(p0))) + ** Step 22 t = ceiling(x/(2(c0))) + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&t, &x[offset], + hashlen * iterations - offset)); /* t = x */ + CHECK_MPI_OK(mp_mul(c0, q, &c0_2)); /* c0_2 is now c0*q */ + CHECK_MPI_OK(mp_add(&c0_2, &c0_2, &c0_2)); /* c0_2 is now 2*q*c0 */ + CHECK_MPI_OK(mp_add(&t, &c0_2, &t)); /* t = x+2*q*c0 */ + CHECK_MPI_OK(mp_sub_d(&t, (mp_digit)1, &t)); /* t = x+2*q*c0 -1 */ + /* t = floor((x+2qc0-1)/2qc0) = ceil(x/2qc0) */ + CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL)); + /* + ** step 11: if (2tqp0 +1 > 2**length), then t = ceiling(2**(length-1)/2qp0) + ** step 12: t = 2tqp0 +1. + ** + ** step 23: if (2tc0 +1 > 2**length), then t = ceiling(2**(length-1)/2c0) + ** step 24: t = 2tc0 +1. + */ + CHECK_MPI_OK(mp_2expt(&two_length_minus_1, length - 1)); +step_23: + CHECK_MPI_OK(mp_mul(&t, &c0_2, &c)); /* c = t*2qc0 */ + CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/ + if (mpl_significant_bits(&c) > length) { /* if c > 2**length */ + CHECK_MPI_OK(mp_sub_d(&c0_2, (mp_digit)1, &t)); /* t = 2qc0-1 */ + /* t = 2**(length-1) + 2qc0 -1 */ + CHECK_MPI_OK(mp_add(&two_length_minus_1, &t, &t)); + /* t = floor((2**(length-1)+2qc0 -1)/2qco) + * = ceil(2**(length-2)/2qc0) */ + CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL)); + CHECK_MPI_OK(mp_mul(&t, &c0_2, &c)); + CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/ + } + /* Step 13/25 prime_gen_counter = prime_gen_counter + 1*/ + (*prime_gen_counter)++; + /* + ** Comment: Test the candidate prime c for primality; first pick an + ** integer a between 2 and c-2. + ** + ** Step 14/26 a=0 + */ + PORT_Memset(x, 0, sizeof(x)); /* use x for a */ + /* + ** Step 15/27 for i = 0 to iterations do + ** a = a + (HASH(prime_seed + i) * 2^(i*outlen)) + ** + ** NOTE: we reuse the x array for 'a' initially. + */ + for (i = 0; i < iterations; i++) { + CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i, + seedlen, &x[(iterations - i - 1) * hashlen])); + } + /* Step 16/28 prime_seed = prime_seed + iterations + 1 */ + CHECK_SEC_OK(addToSeed(prime_seed, iterations, seedlen, prime_seed)); + /* Step 17/29 a = 2 + (a mod (c-3)). */ + CHECK_MPI_OK(mp_read_unsigned_octets(&a, x, iterations * hashlen)); + CHECK_MPI_OK(mp_sub_d(&c, (mp_digit)3, &z)); /* z = c -3 */ + CHECK_MPI_OK(mp_mod(&a, &z, &a)); /* a = a mod c -3 */ + CHECK_MPI_OK(mp_add_d(&a, (mp_digit)2, &a)); /* a = 2 + a mod c -3 */ + /* + ** Step 18 z = a**(2tq) mod p. + ** Step 30 z = a**(2t) mod c. + */ + CHECK_MPI_OK(mp_mul(&t, q, &z)); /* z = tq */ + CHECK_MPI_OK(mp_add(&z, &z, &z)); /* z = 2tq */ + CHECK_MPI_OK(mp_exptmod(&a, &z, &c, &z)); /* z = a**(2tq) mod c */ + /* + ** Step 19 if (( 1 == GCD(z-1,p)) and ( 1 == z**p0 mod p )), then + ** Step 31 if (( 1 == GCD(z-1,c)) and ( 1 == z**c0 mod c )), then + */ + CHECK_MPI_OK(mp_sub_d(&z, (mp_digit)1, &a)); + CHECK_MPI_OK(mp_gcd(&a, &c, &a)); + if (mp_cmp_d(&a, (mp_digit)1) == 0) { + CHECK_MPI_OK(mp_exptmod(&z, c0, &c, &a)); + if (mp_cmp_d(&a, (mp_digit)1) == 0) { + /* Step 31.1 prime = c */ + CHECK_MPI_OK(mp_copy(&c, prime)); + /* + ** Step 31.2 return Success, prime, prime_seed, + ** prime_gen_counter + */ + rv = SECSuccess; + goto cleanup; + } + } + /* + ** Step 20/32 If (prime_gen_counter > 4 * length + old_counter then + ** return (FAILURE, 0, 0, 0). + ** NOTE: the test is reversed, so we fall through on failure to the + ** cleanup routine + */ + if (*prime_gen_counter < (4 * length + old_counter)) { + /* Step 21/33 t = t + 1 */ + CHECK_MPI_OK(mp_add_d(&t, (mp_digit)1, &t)); + /* Step 22/34 Go to step 23/11 */ + goto step_23; + } + + /* if (prime_gencont > (4*length + old_counter), fall through to failure */ + rv = SECFailure; /* really is already set, but paranoia is good */ + +cleanup: + mp_clear(&c); + mp_clear(&c0_2); + mp_clear(&t); + mp_clear(&a); + mp_clear(&z); + mp_clear(&two_length_minus_1); + PORT_Memset(x, 0, sizeof(x)); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv == SECFailure) { + mp_zero(prime); + if (prime_seed->data) { + SECITEM_ZfreeItem(prime_seed, PR_FALSE); + } + *prime_gen_counter = 0; + } + return rv; +} + +/* +** Perform steps from FIPS 186-3, Appendix C.6 +** +** This generates a provable prime from a seed +*/ +static SECStatus +makePrimefromSeedShaweTaylor( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int length, /* input. Length of prime in bits. */ + const SECItem *input_seed, /* input. */ + mp_int *prime, /* output. */ + SECItem *prime_seed, /* output. */ + unsigned int *prime_gen_counter) /* output. */ +{ + mp_int c; + mp_int c0; + mp_int one; + SECStatus rv = SECFailure; + int hashlen = HASH_ResultLen(hashtype); + int outlen = hashlen * PR_BITS_PER_BYTE; + int offset; + int seedlen = input_seed->len * 8; /*seedlen is in bits */ + unsigned char bit, mask; + unsigned char x[HASH_LENGTH_MAX * 2]; + mp_digit dummy; + mp_err err = MP_OKAY; + int i; + + MP_DIGITS(&c) = 0; + MP_DIGITS(&c0) = 0; + MP_DIGITS(&one) = 0; + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&c0)); + CHECK_MPI_OK(mp_init(&one)); + + /* Step 1. if length < 2 then return (FAILURE, 0, 0, 0) */ + if (length < 2) { + rv = SECFailure; + goto cleanup; + } + /* Step 2. if length >= 33 then goto step 14 */ + if (length >= 33) { + mp_zero(&one); + CHECK_MPI_OK(mp_add_d(&one, (mp_digit)1, &one)); + + /* Step 14 (status, c0, prime_seed, prime_gen_counter) = + ** (ST_Random_Prime((ceil(length/2)+1, input_seed) + */ + rv = makePrimefromSeedShaweTaylor(hashtype, (length + 1) / 2 + 1, + input_seed, &c0, prime_seed, prime_gen_counter); + /* Step 15 if FAILURE is returned, return (FAILURE, 0, 0, 0). */ + if (rv != SECSuccess) { + goto cleanup; + } + /* Steps 16-34 */ + rv = makePrimefromPrimesShaweTaylor(hashtype, length, seedlen, &c0, &one, + prime, prime_seed, prime_gen_counter); + goto cleanup; /* we're done, one way or the other */ + } + /* Step 3 prime_seed = input_seed */ + CHECK_SEC_OK(SECITEM_CopyItem(NULL, prime_seed, input_seed)); + /* Step 4 prime_gen_count = 0 */ + *prime_gen_counter = 0; + +step_5: + /* Step 5 c = Hash(prime_seed) xor Hash(prime_seed+1). */ + CHECK_SEC_OK(HASH_HashBuf(hashtype, x, prime_seed->data, prime_seed->len)); + CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, 1, seedlen, &x[hashlen])); + for (i = 0; i < hashlen; i++) { + x[i] = x[i] ^ x[i + hashlen]; + } + /* Step 6 c = 2**length-1 + c mod 2**length-1 */ + /* This step mathematically sets the high bit and clears out + ** all the other bits higher than length. Right now c is stored + ** in the x array, MSB first. The above formula gives us a c which + ** is length bytes long and has the high bit set. We also know that + ** length < outlen since the smallest outlen is 160 bits and the largest + ** length at this point is 32 bits. So first we find the offset in bytes + ** into the array where the high bit is. + */ + offset = (outlen - length) / PR_BITS_PER_BYTE; + /* now we want to set the 'high bit'. We have to calculate this since + * length may not be a multiple of 8.*/ + bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */ + /* we need to zero out the rest of the bits in the byte above */ + mask = (bit - 1); + /* now we set it */ + x[offset] = (mask & x[offset]) | bit; + /* Step 7 c = c*floor(c/2) + 1 */ + /* set the low bit. much easier to find (the end of the array) */ + x[hashlen - 1] |= 1; + /* now that we've set our bits, we can create our candidate "c" */ + CHECK_MPI_OK(mp_read_unsigned_octets(&c, &x[offset], hashlen - offset)); + /* Step 8 prime_gen_counter = prime_gen_counter + 1 */ + (*prime_gen_counter)++; + /* Step 9 prime_seed = prime_seed + 2 */ + CHECK_SEC_OK(addToSeed(prime_seed, 2, seedlen, prime_seed)); + /* Step 10 Perform deterministic primality test on c. For example, since + ** c is small, it's primality can be tested by trial division, See + ** See Appendic C.7. + ** + ** We in fact test with trial division. mpi has a built int trial divider + ** that divides all divisors up to 2^16. + */ + if (prime_tab[prime_tab_size - 1] < 0xFFF1) { + /* we aren't testing all the primes between 0 and 2^16, we really + * can't use this construction. Just fail. */ + rv = SECFailure; + goto cleanup; + } + dummy = prime_tab_size; + err = mpp_divis_primes(&c, &dummy); + /* Step 11 if c is prime then */ + if (err == MP_NO) { + /* Step 11.1 prime = c */ + CHECK_MPI_OK(mp_copy(&c, prime)); + /* Step 11.2 return SUCCESS prime, prime_seed, prime_gen_counter */ + err = MP_OKAY; + rv = SECSuccess; + goto cleanup; + } else if (err != MP_YES) { + goto cleanup; /* function failed, bail out */ + } else { + /* reset mp_err */ + err = MP_OKAY; + } + /* + ** Step 12 if (prime_gen_counter > (4*len)) + ** then return (FAILURE, 0, 0, 0)) + ** Step 13 goto step 5 + */ + if (*prime_gen_counter <= (4 * length)) { + goto step_5; + } + /* if (prime_gencont > 4*length), fall through to failure */ + rv = SECFailure; /* really is already set, but paranoia is good */ + +cleanup: + mp_clear(&c); + mp_clear(&c0); + mp_clear(&one); + PORT_Memset(x, 0, sizeof(x)); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv == SECFailure) { + mp_zero(prime); + if (prime_seed->data) { + SECITEM_ZfreeItem(prime_seed, PR_FALSE); + } + *prime_gen_counter = 0; + } + return rv; +} + +/* + * Find a Q and algorithm from Seed. + */ +static SECStatus +findQfromSeed( + unsigned int L, /* input. Length of p in bits. */ + unsigned int N, /* input. Length of q in bits. */ + unsigned int g, /* input. Length of seed in bits. */ + const SECItem *seed, /* input. */ + mp_int *Q, /* input. */ + mp_int *Q_, /* output. */ + unsigned int *qseed_len, /* output */ + HASH_HashType *hashtypePtr, /* output. Hash uses */ + pqgGenType *typePtr, /* output. Generation Type used */ + unsigned int *qgen_counter) /* output. q_counter */ +{ + HASH_HashType hashtype = HASH_AlgNULL; + SECItem firstseed = { 0, 0, 0 }; + SECItem qseed = { 0, 0, 0 }; + SECStatus rv; + + *qseed_len = 0; /* only set if FIPS186_3_ST_TYPE */ + + /* handle legacy small DSA first can only be FIPS186_1_TYPE */ + if (L < 1024) { + rv = makeQfromSeed(g, seed, Q_); + if ((rv == SECSuccess) && (mp_cmp(Q, Q_) == 0)) { + *hashtypePtr = HASH_AlgSHA1; + *typePtr = FIPS186_1_TYPE; + return SECSuccess; + } + mp_zero(Q_); + return SECFailure; + } + /* 1024 could use FIPS186_1 or FIPS186_3 algorithms, we need to try + * them both */ + if (L == 1024) { + rv = makeQfromSeed(g, seed, Q_); + if (rv == SECSuccess) { + if (mp_cmp(Q, Q_) == 0) { + *hashtypePtr = HASH_AlgSHA1; + *typePtr = FIPS186_1_TYPE; + return SECSuccess; + } + } + /* fall through for FIPS186_3 types */ + } + /* at this point we know we aren't using FIPS186_1, start trying FIPS186_3 + * with appropriate hash types */ + for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL; + hashtype = getNextHash(hashtype)) { + rv = makeQ2fromSeed(hashtype, N, seed, Q_); + if (rv != SECSuccess) { + continue; + } + if (mp_cmp(Q, Q_) == 0) { + *hashtypePtr = hashtype; + *typePtr = FIPS186_3_TYPE; + return SECSuccess; + } + } + /* + * OK finally try FIPS186_3 Shawe-Taylor + */ + firstseed = *seed; + firstseed.len = seed->len / 3; + for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL; + hashtype = getNextHash(hashtype)) { + unsigned int count; + + rv = makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, Q_, + &qseed, &count); + if (rv != SECSuccess) { + continue; + } + if (mp_cmp(Q, Q_) == 0) { + /* check qseed as well... */ + int offset = seed->len - qseed.len; + if ((offset < 0) || + (PORT_Memcmp(&seed->data[offset], qseed.data, qseed.len) != 0)) { + /* we found q, but the seeds don't match. This isn't an + * accident, someone has been tweeking with the seeds, just + * fail a this point. */ + SECITEM_FreeItem(&qseed, PR_FALSE); + mp_zero(Q_); + return SECFailure; + } + *qseed_len = qseed.len; + *hashtypePtr = hashtype; + *typePtr = FIPS186_3_ST_TYPE; + *qgen_counter = count; + SECITEM_ZfreeItem(&qseed, PR_FALSE); + return SECSuccess; + } + SECITEM_ZfreeItem(&qseed, PR_FALSE); + } + /* no hash algorithms found which match seed to Q, fail */ + mp_zero(Q_); + return SECFailure; +} + +/* +** Perform steps 7, 8 and 9 of FIPS 186, appendix 2.2. +** which are the same as steps 11.1-11.5 of FIPS 186-2, App A.1.1.2 +** Generate P from Q, seed, L, and offset. +*/ +static SECStatus +makePfromQandSeed( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int L, /* Length of P in bits. Per FIPS 186. */ + unsigned int N, /* Length of Q in bits. Per FIPS 186. */ + unsigned int offset, /* Per FIPS 186, App 2.2. & 186-3 App A.1.1.2 */ + unsigned int seedlen, /* input. Length of seed in bits. (g in 186-1)*/ + const SECItem *seed, /* input. */ + const mp_int *Q, /* input. */ + mp_int *P) /* output. */ +{ + unsigned int j; /* Per FIPS 186-3 App. A.1.1.2 (k in 186-1)*/ + unsigned int n; /* Per FIPS 186, appendix 2.2. */ + mp_digit b; /* Per FIPS 186, appendix 2.2. */ + unsigned int outlen; /* Per FIPS 186-3 App. A.1.1.2 */ + unsigned int hashlen; /* outlen in bytes */ + unsigned char V_j[HASH_LENGTH_MAX]; + mp_int W, X, c, twoQ, V_n, tmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + /* Initialize bignums */ + MP_DIGITS(&W) = 0; + MP_DIGITS(&X) = 0; + MP_DIGITS(&c) = 0; + MP_DIGITS(&twoQ) = 0; + MP_DIGITS(&V_n) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&W)); + CHECK_MPI_OK(mp_init(&X)); + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&twoQ)); + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_init(&V_n)); + + hashlen = HASH_ResultLen(hashtype); + outlen = hashlen * PR_BITS_PER_BYTE; + + PORT_Assert(outlen > 0); + + /* L - 1 = n*outlen + b */ + n = (L - 1) / outlen; + b = (L - 1) % outlen; + + /* ****************************************************************** + ** Step 11.1 (Step 7 in 186-1) + ** "for j = 0 ... n let + ** V_j = SHA[(SEED + offset + j) mod 2**seedlen]." + ** + ** Step 11.2 (Step 8 in 186-1) + ** "W = V_0 + (V_1 * 2**outlen) + ... + (V_n-1 * 2**((n-1)*outlen)) + ** + ((V_n mod 2**b) * 2**(n*outlen)) + */ + for (j = 0; j < n; ++j) { /* Do the first n terms of V_j */ + /* Do step 11.1 for iteration j. + ** V_j = HASH[(seed + offset + j) mod 2**g] + */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + j, seedlen, V_j)); + /* Do step 11.2 for iteration j. + ** W += V_j * 2**(j*outlen) + */ + OCTETS_TO_MPINT(V_j, &tmp, hashlen); /* get bignum V_j */ + CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, j * outlen)); /* tmp=V_j << j*outlen */ + CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */ + } + /* Step 11.2, continued. + ** [W += ((V_n mod 2**b) * 2**(n*outlen))] + */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + n, seedlen, V_j)); + OCTETS_TO_MPINT(V_j, &V_n, hashlen); /* get bignum V_n */ + CHECK_MPI_OK(mp_div_2d(&V_n, b, NULL, &tmp)); /* tmp = V_n mod 2**b */ + CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, n * outlen)); /* tmp = tmp << n*outlen */ + CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */ + /* Step 11.3, (Step 8 in 186-1) + ** "X = W + 2**(L-1). + ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L." + */ + CHECK_MPI_OK(mpl_set_bit(&X, (mp_size)(L - 1), 1)); /* X = 2**(L-1) */ + CHECK_MPI_OK(mp_add(&X, &W, &X)); /* X += W */ + /************************************************************* + ** Step 11.4. (Step 9 in 186-1) + ** "c = X mod 2q" + */ + CHECK_MPI_OK(mp_mul_2(Q, &twoQ)); /* 2q */ + CHECK_MPI_OK(mp_mod(&X, &twoQ, &c)); /* c = X mod 2q */ + /************************************************************* + ** Step 11.5. (Step 9 in 186-1) + ** "p = X - (c - 1). + ** Note that p is congruent to 1 mod 2q." + */ + CHECK_MPI_OK(mp_sub_d(&c, 1, &c)); /* c -= 1 */ + CHECK_MPI_OK(mp_sub(&X, &c, P)); /* P = X - c */ +cleanup: + PORT_Memset(V_j, 0, sizeof V_j); + mp_clear(&W); + mp_clear(&X); + mp_clear(&c); + mp_clear(&twoQ); + mp_clear(&V_n); + mp_clear(&tmp); + if (err) { + MP_TO_SEC_ERROR(err); + mp_zero(P); + return SECFailure; + } + if (rv != SECSuccess) { + mp_zero(P); + } + return rv; +} + +/* +** Generate G from h, P, and Q. +*/ +static SECStatus +makeGfromH(const mp_int *P, /* input. */ + const mp_int *Q, /* input. */ + mp_int *H, /* input and output. */ + mp_int *G, /* output. */ + PRBool *passed) +{ + mp_int exp, pm1; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + *passed = PR_FALSE; + MP_DIGITS(&exp) = 0; + MP_DIGITS(&pm1) = 0; + CHECK_MPI_OK(mp_init(&exp)); + CHECK_MPI_OK(mp_init(&pm1)); + CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */ + if (mp_cmp(H, &pm1) >= 0) /* H >= P-1 */ + CHECK_MPI_OK(mp_sub(H, &pm1, H)); /* H = H mod (P-1) */ + /* Let b = 2**n (smallest power of 2 greater than P). + ** Since P-1 >= b/2, and H < b, quotient(H/(P-1)) = 0 or 1 + ** so the above operation safely computes H mod (P-1) + */ + /* Check for H = to 0 or 1. Regen H if so. (Regen means return error). */ + if (mp_cmp_d(H, 1) <= 0) { + rv = SECFailure; + goto cleanup; + } + /* Compute G, according to the equation G = (H ** ((P-1)/Q)) mod P */ + CHECK_MPI_OK(mp_div(&pm1, Q, &exp, NULL)); /* exp = (P-1)/Q */ + CHECK_MPI_OK(mp_exptmod(H, &exp, P, G)); /* G = H ** exp mod P */ + /* Check for G == 0 or G == 1, return error if so. */ + if (mp_cmp_d(G, 1) <= 0) { + rv = SECFailure; + goto cleanup; + } + *passed = PR_TRUE; +cleanup: + mp_clear(&exp); + mp_clear(&pm1); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv != SECSuccess) { + mp_zero(G); + } + return rv; +} + +/* +** Generate G from seed, index, P, and Q. +*/ +static SECStatus +makeGfromIndex(HASH_HashType hashtype, + const mp_int *P, /* input. */ + const mp_int *Q, /* input. */ + const SECItem *seed, /* input. */ + unsigned char index, /* input. */ + mp_int *G) /* input/output */ +{ + mp_int e, pm1, W; + unsigned int count; + unsigned char data[HASH_LENGTH_MAX]; + unsigned int len; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + const SECHashObject *hashobj = NULL; + void *hashcx = NULL; + + MP_DIGITS(&e) = 0; + MP_DIGITS(&pm1) = 0; + MP_DIGITS(&W) = 0; + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&pm1)); + CHECK_MPI_OK(mp_init(&W)); + + /* initialize our hash stuff */ + hashobj = HASH_GetRawHashObject(hashtype); + if (hashobj == NULL) { + /* shouldn't happen */ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + rv = SECFailure; + goto cleanup; + } + hashcx = hashobj->create(); + if (hashcx == NULL) { + rv = SECFailure; + goto cleanup; + } + + CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */ + /* Step 3 e = (p-1)/q */ + CHECK_MPI_OK(mp_div(&pm1, Q, &e, NULL)); /* e = (P-1)/Q */ +/* Steps 4, 5, and 6 */ +/* count is a 16 bit value in the spec. We actually represent count + * as more than 16 bits so we can easily detect the 16 bit overflow */ +#define MAX_COUNT 0x10000 + for (count = 1; count < MAX_COUNT; count++) { + /* step 7 + * U = domain_param_seed || "ggen" || index || count + * step 8 + * W = HASH(U) + */ + hashobj->begin(hashcx); + hashobj->update(hashcx, seed->data, seed->len); + hashobj->update(hashcx, (unsigned char *)"ggen", 4); + hashobj->update(hashcx, &index, 1); + data[0] = (count >> 8) & 0xff; + data[1] = count & 0xff; + hashobj->update(hashcx, data, 2); + hashobj->end(hashcx, data, &len, sizeof(data)); + OCTETS_TO_MPINT(data, &W, len); + /* step 9. g = W**e mod p */ + CHECK_MPI_OK(mp_exptmod(&W, &e, P, G)); + /* step 10. if (g < 2) then goto step 5 */ + /* NOTE: this weird construct is to keep the flow according to the spec. + * the continue puts us back to step 5 of the for loop */ + if (mp_cmp_d(G, 2) < 0) { + continue; + } + break; /* step 11 follows step 10 if the test condition is false */ + } + if (count >= MAX_COUNT) { + rv = SECFailure; /* last part of step 6 */ + } +/* step 11. + * return valid G */ +cleanup: + PORT_Memset(data, 0, sizeof(data)); + if (hashcx) { + hashobj->destroy(hashcx, PR_TRUE); + } + mp_clear(&e); + mp_clear(&pm1); + mp_clear(&W); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* This code uses labels and gotos, so that it can follow the numbered +** steps in the algorithms from FIPS 186-3 appendix A.1.1.2 very closely, +** and so that the correctness of this code can be easily verified. +** So, please forgive the ugly c code. +**/ +static SECStatus +pqg_ParamGen(unsigned int L, unsigned int N, pqgGenType type, + unsigned int seedBytes, PQGParams **pParams, PQGVerify **pVfy) +{ + unsigned int n; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + unsigned int seedlen; /* Per FIPS 186-3 app A.1.1.2 (was 'g' 186-1)*/ + unsigned int counter; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + unsigned int offset; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + unsigned int outlen; /* Per FIPS 186-3, appendix A.1.1.2. */ + unsigned int maxCount; + HASH_HashType hashtype = HASH_AlgNULL; + SECItem *seed; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + PLArenaPool *arena = NULL; + PQGParams *params = NULL; + PQGVerify *verify = NULL; + PRBool passed; + SECItem hit = { 0, 0, 0 }; + SECItem firstseed = { 0, 0, 0 }; + SECItem qseed = { 0, 0, 0 }; + SECItem pseed = { 0, 0, 0 }; + mp_int P, Q, G, H, l, p0; + mp_err err = MP_OKAY; + SECStatus rv = SECFailure; + int iterations = 0; + + /* Step 1. L and N already checked by caller*/ + /* Step 2. if (seedlen < N) return INVALID; */ + if (seedBytes < N / PR_BITS_PER_BYTE || !pParams || !pVfy) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Initialize bignums */ + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&G) = 0; + MP_DIGITS(&H) = 0; + MP_DIGITS(&l) = 0; + MP_DIGITS(&p0) = 0; + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&G)); + CHECK_MPI_OK(mp_init(&H)); + CHECK_MPI_OK(mp_init(&l)); + CHECK_MPI_OK(mp_init(&p0)); + + /* parameters have been passed in, only generate G */ + if (*pParams != NULL) { + /* we only support G index generation if generating separate from PQ */ + if ((*pVfy == NULL) || (type == FIPS186_1_TYPE) || + ((*pVfy)->h.len != 1) || ((*pVfy)->h.data == NULL) || + ((*pVfy)->seed.data == NULL) || ((*pVfy)->seed.len == 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + params = *pParams; + verify = *pVfy; + + /* fill in P Q, */ + SECITEM_TO_MPINT((*pParams)->prime, &P); + SECITEM_TO_MPINT((*pParams)->subPrime, &Q); + hashtype = getFirstHash(L, N); + CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &(*pVfy)->seed, + (*pVfy)->h.data[0], &G)); + MPINT_TO_SECITEM(&G, &(*pParams)->base, (*pParams)->arena); + goto cleanup; + } + /* Initialize an arena for the params. */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + params = (PQGParams *)PORT_ArenaZAlloc(arena, sizeof(PQGParams)); + if (!params) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + params->arena = arena; + /* Initialize an arena for the verify. */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(params->arena, PR_TRUE); + return SECFailure; + } + verify = (PQGVerify *)PORT_ArenaZAlloc(arena, sizeof(PQGVerify)); + if (!verify) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + PORT_FreeArena(params->arena, PR_TRUE); + return SECFailure; + } + verify->arena = arena; + seed = &verify->seed; + arena = NULL; + + /* Select Hash and Compute lengths. */ + /* getFirstHash gives us the smallest acceptable hash for this key + * strength */ + hashtype = getFirstHash(L, N); + outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE; + + /* Step 3: n = Ceil(L/outlen)-1; (same as n = Floor((L-1)/outlen)) */ + n = (L - 1) / outlen; + /* Step 4: (skipped since we don't use b): b = L -1 - (n*outlen); */ + seedlen = seedBytes * PR_BITS_PER_BYTE; /* bits in seed */ +step_5: + /* ****************************************************************** + ** Step 5. (Step 1 in 186-1) + ** "Choose an abitrary sequence of at least N bits and call it SEED. + ** Let g be the length of SEED in bits." + */ + if (++iterations > MAX_ITERATIONS) { /* give up after a while */ + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + seed->len = seedBytes; + CHECK_SEC_OK(getPQseed(seed, verify->arena)); + /* ****************************************************************** + ** Step 6. (Step 2 in 186-1) + ** + ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]. (186-1)" + ** "Compute U = HASH[SEED] 2**(N-1). (186-3)" + ** + ** Step 7. (Step 3 in 186-1) + ** "Form Q from U by setting the most signficant bit (the 2**159 bit) + ** and the least signficant bit to 1. In terms of boolean operations, + ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160. (186-1)" + ** + ** "q = 2**(N-1) + U + 1 - (U mod 2) (186-3) + ** + ** Note: Both formulations are the same for U < 2**(N-1) and N=160 + ** + ** If using Shawe-Taylor, We do the entire A.1.2.1.2 setps in the block + ** FIPS186_3_ST_TYPE. + */ + if (type == FIPS186_1_TYPE) { + CHECK_SEC_OK(makeQfromSeed(seedlen, seed, &Q)); + } else if (type == FIPS186_3_TYPE) { + CHECK_SEC_OK(makeQ2fromSeed(hashtype, N, seed, &Q)); + } else { + /* FIPS186_3_ST_TYPE */ + unsigned int qgen_counter, pgen_counter; + + /* Step 1 (L,N) already checked for acceptability */ + + firstseed = *seed; + qgen_counter = 0; + /* Step 2. Use N and firstseed to generate random prime q + * using Apendix C.6 */ + CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, &Q, + &qseed, &qgen_counter)); + /* Step 3. Use floor(L/2+1) and qseed to generate random prime p0 + * using Appendix C.6 */ + pgen_counter = 0; + CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1, + &qseed, &p0, &pseed, &pgen_counter)); + /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */ + CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L, seedBytes * 8, + &p0, &Q, &P, &pseed, &pgen_counter)); + + /* combine all the seeds */ + if ((qseed.len > firstseed.len) || (pseed.len > firstseed.len)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); /* shouldn't happen */ + goto cleanup; + } + /* If the seed overflows, then pseed and qseed may have leading zeros which the mpl code clamps. + * we want to make sure those are added back in so the individual seed lengths are predictable from + * the overall seed length */ + seed->len = firstseed.len * 3; + seed->data = PORT_ArenaZAlloc(verify->arena, seed->len); + if (seed->data == NULL) { + goto cleanup; + } + PORT_Memcpy(seed->data, firstseed.data, firstseed.len); + PORT_Memcpy(seed->data + 2 * firstseed.len - pseed.len, pseed.data, pseed.len); + PORT_Memcpy(seed->data + 3 * firstseed.len - qseed.len, qseed.data, qseed.len); + counter = (qgen_counter << 16) | pgen_counter; + + /* we've generated both P and Q now, skip to generating G */ + goto generate_G; + } + /* ****************************************************************** + ** Step 8. (Step 4 in 186-1) + ** "Use a robust primality testing algorithm to test whether q is prime." + ** + ** Appendix 2.1 states that a Rabin test with at least 50 iterations + ** "will give an acceptable probability of error." + */ + /*CHECK_SEC_OK( prm_RabinTest(&Q, &passed) );*/ + err = mpp_pprime_secure(&Q, prime_testcount_q(L, N)); + passed = (err == MP_YES) ? SECSuccess : SECFailure; + /* ****************************************************************** + ** Step 9. (Step 5 in 186-1) "If q is not prime, goto step 5 (1 in 186-1)." + */ + if (passed != SECSuccess) + goto step_5; + /* ****************************************************************** + ** Step 10. + ** offset = 1; + **( Step 6b 186-1)"Let counter = 0 and offset = 2." + */ + offset = (type == FIPS186_1_TYPE) ? 2 : 1; + /* + ** Step 11. (Step 6a,13a,14 in 186-1) + ** For counter - 0 to (4L-1) do + ** + */ + maxCount = L >= 1024 ? (4 * L - 1) : 4095; + for (counter = 0; counter <= maxCount; counter++) { + /* ****************************************************************** + ** Step 11.1 (Step 7 in 186-1) + ** "for j = 0 ... n let + ** V_j = HASH[(SEED + offset + j) mod 2**seedlen]." + ** + ** Step 11.2 (Step 8 in 186-1) + ** "W = V_0 + V_1*2**outlen+...+ V_n-1 * 2**((n-1)*outlen) + + ** ((Vn* mod 2**b)*2**(n*outlen))" + ** Step 11.3 (Step 8 in 186-1) + ** "X = W + 2**(L-1) + ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L." + ** + ** Step 11.4 (Step 9 in 186-1). + ** "c = X mod 2q" + ** + ** Step 11.5 (Step 9 in 186-1). + ** " p = X - (c - 1). + ** Note that p is congruent to 1 mod 2q." + */ + CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, seedlen, + seed, &Q, &P)); + /************************************************************* + ** Step 11.6. (Step 10 in 186-1) + ** "if p < 2**(L-1), then goto step 11.9. (step 13 in 186-1)" + */ + CHECK_MPI_OK(mpl_set_bit(&l, (mp_size)(L - 1), 1)); /* l = 2**(L-1) */ + if (mp_cmp(&P, &l) < 0) + goto step_11_9; + /************************************************************ + ** Step 11.7 (step 11 in 186-1) + ** "Perform a robust primality test on p." + */ + /*CHECK_SEC_OK( prm_RabinTest(&P, &passed) );*/ + err = mpp_pprime_secure(&P, prime_testcount_p(L, N)); + passed = (err == MP_YES) ? SECSuccess : SECFailure; + /* ****************************************************************** + ** Step 11.8. "If p is determined to be primed return VALID + ** values of p, q, seed and counter." + */ + if (passed == SECSuccess) + break; + step_11_9: + /* ****************************************************************** + ** Step 11.9. "offset = offset + n + 1." + */ + offset += n + 1; + } + /* ****************************************************************** + ** Step 12. "goto step 5." + ** + ** NOTE: if counter <= maxCount, then we exited the loop at Step 11.8 + ** and now need to return p,q, seed, and counter. + */ + if (counter > maxCount) + goto step_5; + +generate_G: + /* ****************************************************************** + ** returning p, q, seed and counter + */ + if (type == FIPS186_1_TYPE) { + /* Generate g, This is called the "Unverifiable Generation of g + * in FIPA186-3 Appedix A.2.1. For compatibility we maintain + * this version of the code */ + SECITEM_AllocItem(NULL, &hit, L / 8); /* h is no longer than p */ + if (!hit.data) + goto cleanup; + do { + /* loop generate h until 1 1 */ + CHECK_SEC_OK(generate_h_candidate(&hit, &H)); + CHECK_SEC_OK(makeGfromH(&P, &Q, &H, &G, &passed)); + } while (passed != PR_TRUE); + MPINT_TO_SECITEM(&H, &verify->h, verify->arena); + } else { + unsigned char index = 1; /* default to 1 */ + verify->h.data = (unsigned char *)PORT_ArenaZAlloc(verify->arena, 1); + if (verify->h.data == NULL) { + goto cleanup; + } + verify->h.len = 1; + verify->h.data[0] = index; + /* Generate g, using the FIPS 186-3 Appendix A.23 */ + CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, seed, index, &G)); + } + /* All generation is done. Now, save the PQG params. */ + MPINT_TO_SECITEM(&P, ¶ms->prime, params->arena); + MPINT_TO_SECITEM(&Q, ¶ms->subPrime, params->arena); + MPINT_TO_SECITEM(&G, ¶ms->base, params->arena); + verify->counter = counter; + *pParams = params; + *pVfy = verify; +cleanup: + if (pseed.data) { + SECITEM_ZfreeItem(&pseed, PR_FALSE); + } + if (qseed.data) { + SECITEM_ZfreeItem(&qseed, PR_FALSE); + } + mp_clear(&P); + mp_clear(&Q); + mp_clear(&G); + mp_clear(&H); + mp_clear(&l); + mp_clear(&p0); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv) { + if (params) { + PORT_FreeArena(params->arena, PR_TRUE); + } + if (verify) { + PORT_FreeArena(verify->arena, PR_TRUE); + } + } + if (hit.data) { + SECITEM_ZfreeItem(&hit, PR_FALSE); + } + return rv; +} + +SECStatus +PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy) +{ + unsigned int L; /* Length of P in bits. Per FIPS 186. */ + unsigned int seedBytes; + + if (j > 8 || !pParams || !pVfy) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + L = 512 + (j * 64); /* bits in P */ + seedBytes = L / 8; + return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes, + pParams, pVfy); +} + +SECStatus +PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + unsigned int L; /* Length of P in bits. Per FIPS 186. */ + + if (j > 8 || !pParams || !pVfy) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + L = 512 + (j * 64); /* bits in P */ + return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes, + pParams, pVfy); +} + +SECStatus +PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + if (N == 0) { + N = pqg_get_default_N(L); + } + if (seedBytes == 0) { + /* seedBytes == L/8 for probable primes, N/8 for Shawe-Taylor Primes */ + seedBytes = N / 8; + } + if (pqg_validate_dsa2(L, N) != SECSuccess) { + /* error code already set */ + return SECFailure; + } + return pqg_ParamGen(L, N, FIPS186_3_ST_TYPE, seedBytes, pParams, pVfy); +} + +/* + * verify can use vfy structures returned from either FIPS186-1 or + * FIPS186-2, and can handle differences in selected Hash functions to + * generate the parameters. + */ +SECStatus +PQG_VerifyParams(const PQGParams *params, + const PQGVerify *vfy, SECStatus *result) +{ + SECStatus rv = SECSuccess; + unsigned int g, n, L, N, offset, outlen; + mp_int p0, P, Q, G, P_, Q_, G_, r, h; + mp_err err = MP_OKAY; + int j; + unsigned int counter_max = 0; /* handle legacy L < 1024 */ + unsigned int qseed_len; + unsigned int qgen_counter_ = 0; + SECItem pseed_ = { 0, 0, 0 }; + HASH_HashType hashtype = HASH_AlgNULL; + pqgGenType type = FIPS186_1_TYPE; + +#define CHECKPARAM(cond) \ + if (!(cond)) { \ + *result = SECFailure; \ + goto cleanup; \ + } + if (!params || !vfy || !result) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* always need at least p, q, and seed for any meaningful check */ + if ((params->prime.len == 0) || (params->subPrime.len == 0) || + (vfy->seed.len == 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* we want to either check PQ or G or both. If we don't have G, make + * sure we have count so we can check P. */ + if ((params->base.len == 0) && (vfy->counter == -1)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&p0) = 0; + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&G) = 0; + MP_DIGITS(&P_) = 0; + MP_DIGITS(&Q_) = 0; + MP_DIGITS(&G_) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&h) = 0; + CHECK_MPI_OK(mp_init(&p0)); + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&G)); + CHECK_MPI_OK(mp_init(&P_)); + CHECK_MPI_OK(mp_init(&Q_)); + CHECK_MPI_OK(mp_init(&G_)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&h)); + *result = SECSuccess; + SECITEM_TO_MPINT(params->prime, &P); + SECITEM_TO_MPINT(params->subPrime, &Q); + /* if G isn't specified, just check P and Q */ + if (params->base.len != 0) { + SECITEM_TO_MPINT(params->base, &G); + } + /* 1. Check (L,N) pair */ + N = mpl_significant_bits(&Q); + L = mpl_significant_bits(&P); + if (L < 1024) { + /* handle DSA1 pqg parameters with less thatn 1024 bits*/ + CHECKPARAM(N == DSA1_Q_BITS); + j = PQG_PBITS_TO_INDEX(L); + CHECKPARAM(j >= 0 && j <= 8); + counter_max = 4096; + } else { + /* handle DSA2 parameters (includes DSA1, 1024 bits) */ + CHECKPARAM(pqg_validate_dsa2(L, N) == SECSuccess); + counter_max = 4 * L; + } + /* 3. G < P */ + if (params->base.len != 0) { + CHECKPARAM(mp_cmp(&G, &P) < 0); + } + /* 4. P % Q == 1 */ + CHECK_MPI_OK(mp_mod(&P, &Q, &r)); + CHECKPARAM(mp_cmp_d(&r, 1) == 0); + /* 5. Q is prime */ + CHECKPARAM(mpp_pprime_secure(&Q, prime_testcount_q(L, N)) == MP_YES); + /* 6. P is prime */ + CHECKPARAM(mpp_pprime_secure(&P, prime_testcount_p(L, N)) == MP_YES); + /* Steps 7-12 are done only if the optional PQGVerify is supplied. */ + /* continue processing P */ + /* 7. counter < 4*L */ + /* 8. g >= N and g < 2*L (g is length of seed in bits) */ + /* step 7 and 8 are delayed until we determine which type of generation + * was used */ + /* 9. Q generated from SEED matches Q in PQGParams. */ + /* This function checks all possible hash and generation types to + * find a Q_ which matches Q. */ + g = vfy->seed.len * 8; + CHECKPARAM(findQfromSeed(L, N, g, &vfy->seed, &Q, &Q_, &qseed_len, + &hashtype, &type, &qgen_counter_) == SECSuccess); + CHECKPARAM(mp_cmp(&Q, &Q_) == 0); + /* now we can do steps 7 & 8*/ + if ((type == FIPS186_1_TYPE) || (type == FIPS186_3_TYPE)) { + CHECKPARAM((vfy->counter == -1) || (vfy->counter < counter_max)); + CHECKPARAM(g >= N && g < counter_max / 2); + } + if (type == FIPS186_3_ST_TYPE) { + SECItem qseed = { 0, 0, 0 }; + SECItem pseed = { 0, 0, 0 }; + unsigned int first_seed_len; + unsigned int pgen_counter_ = 0; + unsigned int qgen_counter = (vfy->counter >> 16) & 0xffff; + unsigned int pgen_counter = (vfy->counter) & 0xffff; + + /* extract pseed and qseed from domain_parameter_seed, which is + * first_seed || pseed || qseed. qseed is first_seed + small_integer + * mod the length of first_seed. pseed is qseed + small_integer mod + * the length of first_seed. This means most of the time + * first_seed.len == qseed.len == pseed.len. Rarely qseed.len and/or + * pseed.len will be smaller because mpi clamps them. pqgGen + * automatically adds the zero pad back though, so we can depend + * domain_parameter_seed.len to be a multiple of three. We only have + * to deal with the fact that the returned seeds from our functions + * could be shorter. + * first_seed.len = domain_parameter_seed.len/3 + * We can now find the offsets; + * first_seed.data = domain_parameter_seed.data + 0 + * pseed.data = domain_parameter_seed.data + first_seed.len + * qseed.data = domain_parameter_seed.data + * + domain_paramter_seed.len - qseed.len + * We deal with pseed possibly having zero pad in the pseed check later. + */ + first_seed_len = vfy->seed.len / 3; + CHECKPARAM(qseed_len < vfy->seed.len); + CHECKPARAM(first_seed_len * 8 > N - 1); + CHECKPARAM(first_seed_len * 8 < counter_max / 2); + CHECKPARAM(first_seed_len >= qseed_len); + qseed.len = qseed_len; + qseed.data = vfy->seed.data + vfy->seed.len - qseed.len; + pseed.len = first_seed_len; + pseed.data = vfy->seed.data + first_seed_len; + + /* + * now complete FIPS 186-3 A.1.2.1.2. Step 1 was completed + * above in our initial checks, Step 2 was completed by + * findQfromSeed */ + + /* Step 3 (status, c0, prime_seed, prime_gen_counter) = + ** (ST_Random_Prime((ceil(length/2)+1, input_seed) + */ + CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1, + &qseed, &p0, &pseed_, &pgen_counter_)); + /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */ + CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L, first_seed_len * 8, + &p0, &Q_, &P_, &pseed_, &pgen_counter_)); + CHECKPARAM(mp_cmp(&P, &P_) == 0); + /* make sure pseed wasn't tampered with (since it is part of + * calculating G) */ + if (pseed.len > pseed_.len) { + /* handle the case of zero pad for pseed */ + int extra = pseed.len - pseed_.len; + int i; + for (i = 0; i < extra; i++) { + if (pseed.data[i] != 0) { + *result = SECFailure; + goto cleanup; + } + } + pseed.data += extra; + pseed.len -= extra; + /* the rest is handled in the normal compare below */ + } + CHECKPARAM(SECITEM_CompareItem(&pseed, &pseed_) == SECEqual); + if (vfy->counter != -1) { + CHECKPARAM(pgen_counter < counter_max); + CHECKPARAM(qgen_counter < counter_max); + CHECKPARAM((pgen_counter_ == pgen_counter)); + CHECKPARAM((qgen_counter_ == qgen_counter)); + } + } else if (vfy->counter == -1) { + /* If counter is set to -1, we are really only verifying G, skip + * the remainder of the checks for P */ + CHECKPARAM(type != FIPS186_1_TYPE); /* we only do this for DSA2 */ + } else { + /* 10. P generated from (L, counter, g, SEED, Q) matches P + * in PQGParams. */ + outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE; + PORT_Assert(outlen > 0); + n = (L - 1) / outlen; + offset = vfy->counter * (n + 1) + ((type == FIPS186_1_TYPE) ? 2 : 1); + CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, g, &vfy->seed, + &Q, &P_)); + CHECKPARAM(mp_cmp(&P, &P_) == 0); + } + + /* now check G, skip if don't have a g */ + if (params->base.len == 0) + goto cleanup; + + /* first Always check that G is OK FIPS186-3 A.2.2 & A.2.4*/ + /* 1. 2 < G < P-1 */ + /* P is prime, p-1 == zero 1st bit */ + CHECK_MPI_OK(mpl_set_bit(&P, 0, 0)); + CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P) < 0); + CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */ + /* 2. verify g**q mod p == 1 */ + CHECK_MPI_OK(mp_exptmod(&G, &Q, &P, &h)); /* h = G ** Q mod P */ + CHECKPARAM(mp_cmp_d(&h, 1) == 0); + + /* no h, the above is the best we can do */ + if (vfy->h.len == 0) { + if (type != FIPS186_1_TYPE) { + *result = SECWouldBlock; + } + goto cleanup; + } + + /* + * If h is one byte and FIPS186-3 was used to generate Q (we've verified + * Q was generated from seed already, then we assume that FIPS 186-3 + * appendix A.2.3 was used to generate G. Otherwise we assume A.2.1 was + * used to generate G. + */ + if ((vfy->h.len == 1) && (type != FIPS186_1_TYPE)) { + /* A.2.3 */ + CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &vfy->seed, + vfy->h.data[0], &G_)); + CHECKPARAM(mp_cmp(&G, &G_) == 0); + } else { + int passed; + /* A.2.1 */ + SECITEM_TO_MPINT(vfy->h, &h); + /* 11. 1 < h < P-1 */ + /* P is prime, p-1 == zero 1st bit */ + CHECK_MPI_OK(mpl_set_bit(&P, 0, 0)); + CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P)); + CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */ + /* 12. G generated from h matches G in PQGParams. */ + CHECK_SEC_OK(makeGfromH(&P, &Q, &h, &G_, &passed)); + CHECKPARAM(passed && mp_cmp(&G, &G_) == 0); + } +cleanup: + mp_clear(&p0); + mp_clear(&P); + mp_clear(&Q); + mp_clear(&G); + mp_clear(&P_); + mp_clear(&Q_); + mp_clear(&G_); + mp_clear(&r); + mp_clear(&h); + if (pseed_.data) { + SECITEM_ZfreeItem(&pseed_, PR_FALSE); + } + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/************************************************************************** + * Free the PQGParams struct and the things it points to. * + **************************************************************************/ +void +PQG_DestroyParams(PQGParams *params) +{ + if (params == NULL) + return; + if (params->arena != NULL) { + PORT_FreeArena(params->arena, PR_TRUE); + } else { + SECITEM_ZfreeItem(¶ms->prime, PR_FALSE); /* don't free prime */ + SECITEM_ZfreeItem(¶ms->subPrime, PR_FALSE); /* don't free subPrime */ + SECITEM_ZfreeItem(¶ms->base, PR_FALSE); /* don't free base */ + PORT_Free(params); + } +} + +/************************************************************************** + * Free the PQGVerify struct and the things it points to. * + **************************************************************************/ + +void +PQG_DestroyVerify(PQGVerify *vfy) +{ + if (vfy == NULL) + return; + if (vfy->arena != NULL) { + PORT_FreeArena(vfy->arena, PR_TRUE); + } else { + SECITEM_ZfreeItem(&vfy->seed, PR_FALSE); /* don't free seed */ + SECITEM_ZfreeItem(&vfy->h, PR_FALSE); /* don't free h */ + PORT_Free(vfy); + } +} diff --git a/security/nss/lib/freebl/pqg.h b/security/nss/lib/freebl/pqg.h new file mode 100644 index 0000000000..c4eecd5904 --- /dev/null +++ b/security/nss/lib/freebl/pqg.h @@ -0,0 +1,25 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * pqg.h + * + * header file for pqg functions exported just to freebl + */ + +#ifndef _PQG_H_ +#define _PQG_H_ 1 + +/* PQG_GetLength returns the significant bytes in the SECItem object (that is + * the length of the object minus any leading zeros. Any SECItem may be used, + * though this function is usually used for P, Q, or G values */ +unsigned int PQG_GetLength(const SECItem *obj); +/* Check to see the PQG parameters patch a NIST defined DSA size, + * returns SECFaillure and sets SEC_ERROR_INVALID_ARGS if it doesn't. + * See blapi.h for legal DSA PQG sizes. */ +SECStatus PQG_Check(const PQGParams *params); +/* Return the prefered hash algorithm for the given PQGParameters. */ +HASH_HashType PQG_GetHashType(const PQGParams *params); + +#endif /* _PQG_H_ */ diff --git a/security/nss/lib/freebl/rawhash.c b/security/nss/lib/freebl/rawhash.c new file mode 100644 index 0000000000..551727b890 --- /dev/null +++ b/security/nss/lib/freebl/rawhash.c @@ -0,0 +1,154 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "nspr.h" +#include "hasht.h" +#include "blapi.h" /* below the line */ +#include "secerr.h" + +static void * +null_hash_new_context(void) +{ + return NULL; +} + +static void * +null_hash_clone_context(void *v) +{ + PORT_Assert(v == NULL); + return NULL; +} + +static void +null_hash_begin(void *v) +{ +} + +static void +null_hash_update(void *v, const unsigned char *input, unsigned int length) +{ +} + +static void +null_hash_end(void *v, unsigned char *output, unsigned int *outLen, + unsigned int maxOut) +{ + *outLen = 0; +} + +static void +null_hash_destroy_context(void *v, PRBool b) +{ + PORT_Assert(v == NULL); +} + +const SECHashObject SECRawHashObjects[] = { + { 0, + (void *(*)(void))null_hash_new_context, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))null_hash_destroy_context, + (void (*)(void *))null_hash_begin, + (void (*)(void *, const unsigned char *, unsigned int))null_hash_update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))null_hash_end, + 0, + HASH_AlgNULL, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))null_hash_end }, + { + MD2_LENGTH, + (void *(*)(void))MD2_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))MD2_DestroyContext, + (void (*)(void *))MD2_Begin, + (void (*)(void *, const unsigned char *, unsigned int))MD2_Update, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD2_End, + MD2_BLOCK_LENGTH, + HASH_AlgMD2, + NULL /* end_raw */ + }, + { MD5_LENGTH, + (void *(*)(void))MD5_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))MD5_DestroyContext, + (void (*)(void *))MD5_Begin, + (void (*)(void *, const unsigned char *, unsigned int))MD5_Update, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_End, + MD5_BLOCK_LENGTH, + HASH_AlgMD5, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_EndRaw }, + { SHA1_LENGTH, + (void *(*)(void))SHA1_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA1_DestroyContext, + (void (*)(void *))SHA1_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA1_Update, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))SHA1_End, + SHA1_BLOCK_LENGTH, + HASH_AlgSHA1, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int)) + SHA1_EndRaw }, + { SHA256_LENGTH, + (void *(*)(void))SHA256_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA256_DestroyContext, + (void (*)(void *))SHA256_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA256_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA256_End, + SHA256_BLOCK_LENGTH, + HASH_AlgSHA256, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA256_EndRaw }, + { SHA384_LENGTH, + (void *(*)(void))SHA384_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA384_DestroyContext, + (void (*)(void *))SHA384_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA384_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA384_End, + SHA384_BLOCK_LENGTH, + HASH_AlgSHA384, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA384_EndRaw }, + { SHA512_LENGTH, + (void *(*)(void))SHA512_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA512_DestroyContext, + (void (*)(void *))SHA512_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA512_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA512_End, + SHA512_BLOCK_LENGTH, + HASH_AlgSHA512, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA512_EndRaw }, + { SHA224_LENGTH, + (void *(*)(void))SHA224_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA224_DestroyContext, + (void (*)(void *))SHA224_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA224_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA224_End, + SHA224_BLOCK_LENGTH, + HASH_AlgSHA224, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA224_EndRaw }, +}; + +const SECHashObject * +HASH_GetRawHashObject(HASH_HashType hashType) +{ + if (hashType <= HASH_AlgNULL || hashType >= HASH_AlgTOTAL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + return &SECRawHashObjects[hashType]; +} diff --git a/security/nss/lib/freebl/ret_cr16.s b/security/nss/lib/freebl/ret_cr16.s new file mode 100644 index 0000000000..1f53fc9007 --- /dev/null +++ b/security/nss/lib/freebl/ret_cr16.s @@ -0,0 +1,27 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef __LP64__ + .LEVEL 2.0W +#else + .LEVEL 1.1 +#endif + + .CODE ; equivalent to the following two lines +; .SPACE $TEXT$,SORT=8 +; .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24 + +ret_cr16 + .PROC + .CALLINFO FRAME=0, NO_CALLS + .EXPORT ret_cr16,ENTRY + .ENTRY +; BV %r0(%rp) + BV 0(%rp) + MFCTL %cr16,%ret0 + BV %r0(%rp) + .EXIT + NOP + .PROCEND + .END diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c new file mode 100644 index 0000000000..82b1f419d9 --- /dev/null +++ b/security/nss/lib/freebl/rijndael.c @@ -0,0 +1,1265 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prinit.h" +#include "prenv.h" +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" +#include "rijndael.h" + +#include "cts.h" +#include "ctr.h" +#include "gcm.h" +#include "mpi.h" + +#if !defined(IS_LITTLE_ENDIAN) && !defined(NSS_X86_OR_X64) +// not test yet on big endian platform of arm +#undef USE_HW_AES +#endif + +#ifdef __powerpc64__ +#include "ppc-crypto.h" +#endif + +#ifdef USE_HW_AES +#ifdef NSS_X86_OR_X64 +#include "intel-aes.h" +#else +#include "aes-armv8.h" +#endif +#endif /* USE_HW_AES */ +#ifdef INTEL_GCM +#include "intel-gcm.h" +#endif /* INTEL_GCM */ +#if defined(USE_PPC_CRYPTO) && defined(PPC_GCM) +#include "ppc-gcm.h" +#endif + +/* Forward declarations */ +void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, + unsigned int Nk); +void rijndael_native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input); +void rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input); +void native_xorBlock(unsigned char *out, + const unsigned char *a, + const unsigned char *b); + +/* Stub definitions for the above rijndael_native_* functions, which + * shouldn't be used unless NSS_X86_OR_X64 is defined */ +#ifndef NSS_X86_OR_X64 +void +rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, + unsigned int Nk) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +rijndael_native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +native_xorBlock(unsigned char *out, const unsigned char *a, + const unsigned char *b) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} +#endif /* NSS_X86_OR_X64 */ + +/* + * There are currently three ways to build this code, varying in performance + * and code size. + * + * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab + * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table + * values "on-the-fly", using gfm + * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros + * + * The default is RIJNDAEL_INCLUDE_TABLES. + */ + +/* + * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], + * T**-1[0..4], IMXC[0..4] + * When building anything else, includes S, S**-1, Rcon + */ +#include "rijndael32.tab" + +#if defined(RIJNDAEL_INCLUDE_TABLES) +/* + * RIJNDAEL_INCLUDE_TABLES + */ +#define T0(i) _T0[i] +#define T1(i) _T1[i] +#define T2(i) _T2[i] +#define T3(i) _T3[i] +#define TInv0(i) _TInv0[i] +#define TInv1(i) _TInv1[i] +#define TInv2(i) _TInv2[i] +#define TInv3(i) _TInv3[i] +#define IMXC0(b) _IMXC0[b] +#define IMXC1(b) _IMXC1[b] +#define IMXC2(b) _IMXC2[b] +#define IMXC3(b) _IMXC3[b] +/* The S-box can be recovered from the T-tables */ +#ifdef IS_LITTLE_ENDIAN +#define SBOX(b) ((PRUint8)_T3[b]) +#else +#define SBOX(b) ((PRUint8)_T1[b]) +#endif +#define SINV(b) (_SInv[b]) + +#else /* not RIJNDAEL_INCLUDE_TABLES */ + +/* + * Code for generating T-table values. + */ + +#ifdef IS_LITTLE_ENDIAN +#define WORD4(b0, b1, b2, b3) \ + ((((PRUint32)b3) << 24) | \ + (((PRUint32)b2) << 16) | \ + (((PRUint32)b1) << 8) | \ + ((PRUint32)b0)) +#else +#define WORD4(b0, b1, b2, b3) \ + ((((PRUint32)b0) << 24) | \ + (((PRUint32)b1) << 16) | \ + (((PRUint32)b2) << 8) | \ + ((PRUint32)b3)) +#endif + +/* + * Define the S and S**-1 tables (both have been stored) + */ +#define SBOX(b) (_S[b]) +#define SINV(b) (_SInv[b]) + +/* + * The function xtime, used for Galois field multiplication + */ +#define XTIME(a) \ + ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) + +/* Choose GFM method (macros or function) */ +#if defined(RIJNDAEL_GENERATE_VALUES_MACRO) + +/* + * Galois field GF(2**8) multipliers, in macro form + */ +#define GFM01(a) \ + (a) /* a * 01 = a, the identity */ +#define GFM02(a) \ + (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ +#define GFM04(a) \ + (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ +#define GFM08(a) \ + (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ +#define GFM03(a) \ + (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ +#define GFM09(a) \ + (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ +#define GFM0B(a) \ + (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ +#define GFM0D(a) \ + (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ +#define GFM0E(a) \ + (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ + +#else /* RIJNDAEL_GENERATE_VALUES */ + +/* GF_MULTIPLY + * + * multiply two bytes represented in GF(2**8), mod (x**4 + 1) + */ +PRUint8 +gfm(PRUint8 a, PRUint8 b) +{ + PRUint8 res = 0; + while (b > 0) { + res = (b & 0x01) ? res ^ a : res; + a = XTIME(a); + b >>= 1; + } + return res; +} + +#define GFM01(a) \ + (a) /* a * 01 = a, the identity */ +#define GFM02(a) \ + (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ +#define GFM03(a) \ + (gfm(a, 0x03)) /* a * 03 */ +#define GFM09(a) \ + (gfm(a, 0x09)) /* a * 09 */ +#define GFM0B(a) \ + (gfm(a, 0x0B)) /* a * 0B */ +#define GFM0D(a) \ + (gfm(a, 0x0D)) /* a * 0D */ +#define GFM0E(a) \ + (gfm(a, 0x0E)) /* a * 0E */ + +#endif /* choosing GFM function */ + +/* + * The T-tables + */ +#define G_T0(i) \ + (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)))) +#define G_T1(i) \ + (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)))) +#define G_T2(i) \ + (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)))) +#define G_T3(i) \ + (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)))) + +/* + * The inverse T-tables + */ +#define G_TInv0(i) \ + (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)))) +#define G_TInv1(i) \ + (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)))) +#define G_TInv2(i) \ + (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)))) +#define G_TInv3(i) \ + (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)))) + +/* + * The inverse mix column tables + */ +#define G_IMXC0(i) \ + (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i))) +#define G_IMXC1(i) \ + (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i))) +#define G_IMXC2(i) \ + (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i))) +#define G_IMXC3(i) \ + (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i))) + +/* Now choose the T-table indexing method */ +#if defined(RIJNDAEL_GENERATE_VALUES) +/* generate values for the tables with a function*/ +static PRUint32 +gen_TInvXi(PRUint8 tx, PRUint8 i) +{ + PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; + si01 = SINV(i); + si02 = XTIME(si01); + si04 = XTIME(si02); + si08 = XTIME(si04); + si03 = si02 ^ si01; + si09 = si08 ^ si01; + si0B = si08 ^ si03; + si0D = si09 ^ si04; + si0E = si08 ^ si04 ^ si02; + switch (tx) { + case 0: + return WORD4(si0E, si09, si0D, si0B); + case 1: + return WORD4(si0B, si0E, si09, si0D); + case 2: + return WORD4(si0D, si0B, si0E, si09); + case 3: + return WORD4(si09, si0D, si0B, si0E); + } + return -1; +} +#define T0(i) G_T0(i) +#define T1(i) G_T1(i) +#define T2(i) G_T2(i) +#define T3(i) G_T3(i) +#define TInv0(i) gen_TInvXi(0, i) +#define TInv1(i) gen_TInvXi(1, i) +#define TInv2(i) gen_TInvXi(2, i) +#define TInv3(i) gen_TInvXi(3, i) +#define IMXC0(b) G_IMXC0(b) +#define IMXC1(b) G_IMXC1(b) +#define IMXC2(b) G_IMXC2(b) +#define IMXC3(b) G_IMXC3(b) +#else /* RIJNDAEL_GENERATE_VALUES_MACRO */ +/* generate values for the tables with macros */ +#define T0(i) G_T0(i) +#define T1(i) G_T1(i) +#define T2(i) G_T2(i) +#define T3(i) G_T3(i) +#define TInv0(i) G_TInv0(i) +#define TInv1(i) G_TInv1(i) +#define TInv2(i) G_TInv2(i) +#define TInv3(i) G_TInv3(i) +#define IMXC0(b) G_IMXC0(b) +#define IMXC1(b) G_IMXC1(b) +#define IMXC2(b) G_IMXC2(b) +#define IMXC3(b) G_IMXC3(b) +#endif /* choose T-table indexing method */ + +#endif /* not RIJNDAEL_INCLUDE_TABLES */ + +/************************************************************************** + * + * Stuff related to the Rijndael key schedule + * + *************************************************************************/ + +#define SUBBYTE(w) \ + ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \ + (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \ + (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \ + (((PRUint32)SBOX((w)&0xff)))) + +#ifdef IS_LITTLE_ENDIAN +#define ROTBYTE(b) \ + ((b >> 8) | (b << 24)) +#else +#define ROTBYTE(b) \ + ((b << 8) | (b >> 24)) +#endif + +/* rijndael_key_expansion7 + * + * Generate the expanded key from the key input by the user. + * XXX + * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte + * transformation is done periodically. The period is every 4 bytes, and + * since 7%4 != 0 this happens at different times for each key word (unlike + * Nk == 8 where it happens twice in every key word, in the same positions). + * For now, I'm implementing this case "dumbly", w/o any unrolling. + */ +static void +rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ + unsigned int i; + PRUint32 *W; + PRUint32 *pW; + PRUint32 tmp; + W = cx->k.expandedKey; + /* 1. the first Nk words contain the cipher key */ + memcpy(W, key, Nk * 4); + i = Nk; + /* 2. loop until full expanded key is obtained */ + pW = W + i - 1; + for (; i < cx->Nb * (cx->Nr + 1); ++i) { + tmp = *pW++; + if (i % Nk == 0) + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; + else if (i % Nk == 4) + tmp = SUBBYTE(tmp); + *pW = W[i - Nk] ^ tmp; + } +} + +/* rijndael_key_expansion + * + * Generate the expanded key from the key input by the user. + */ +static void +rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ + unsigned int i; + PRUint32 *W; + PRUint32 *pW; + PRUint32 tmp; + unsigned int round_key_words = cx->Nb * (cx->Nr + 1); + if (Nk == 7) { + rijndael_key_expansion7(cx, key, Nk); + return; + } + W = cx->k.expandedKey; + /* The first Nk words contain the input cipher key */ + memcpy(W, key, Nk * 4); + i = Nk; + pW = W + i - 1; + /* Loop over all sets of Nk words, except the last */ + while (i < round_key_words - Nk) { + tmp = *pW++; + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; + *pW = W[i++ - Nk] ^ tmp; + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + if (Nk == 4) + continue; + switch (Nk) { + case 8: + tmp = *pW++; + tmp = SUBBYTE(tmp); + *pW = W[i++ - Nk] ^ tmp; + case 7: + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + case 6: + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + case 5: + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + } + } + /* Generate the last word */ + tmp = *pW++; + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; + *pW = W[i++ - Nk] ^ tmp; + /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, + * since the above loop generated all but the last Nk key words, there + * is no more need for the SubByte transformation. + */ + if (Nk < 8) { + for (; i < round_key_words; ++i) { + tmp = *pW++; + *pW = W[i - Nk] ^ tmp; + } + } else { + /* except in the case when Nk == 8. Then one more SubByte may have + * to be performed, at i % Nk == 4. + */ + for (; i < round_key_words; ++i) { + tmp = *pW++; + if (i % Nk == 4) + tmp = SUBBYTE(tmp); + *pW = W[i - Nk] ^ tmp; + } + } +} + +/* rijndael_invkey_expansion + * + * Generate the expanded key for the inverse cipher from the key input by + * the user. + */ +static void +rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ + unsigned int r; + PRUint32 *roundkeyw; + PRUint8 *b; + int Nb = cx->Nb; + /* begins like usual key expansion ... */ + rijndael_key_expansion(cx, key, Nk); + /* ... but has the additional step of InvMixColumn, + * excepting the first and last round keys. + */ + roundkeyw = cx->k.expandedKey + cx->Nb; + for (r = 1; r < cx->Nr; ++r) { + /* each key word, roundkeyw, represents a column in the key + * matrix. Each column is multiplied by the InvMixColumn matrix. + * [ 0E 0B 0D 09 ] [ b0 ] + * [ 09 0E 0B 0D ] * [ b1 ] + * [ 0D 09 0E 0B ] [ b2 ] + * [ 0B 0D 09 0E ] [ b3 ] + */ + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + if (Nb <= 4) + continue; + switch (Nb) { + case 8: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + case 7: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + case 6: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + case 5: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + } + } +} + +/************************************************************************** + * + * Stuff related to Rijndael encryption/decryption. + * + *************************************************************************/ + +#ifdef IS_LITTLE_ENDIAN +#define BYTE0WORD(w) ((w)&0x000000ff) +#define BYTE1WORD(w) ((w)&0x0000ff00) +#define BYTE2WORD(w) ((w)&0x00ff0000) +#define BYTE3WORD(w) ((w)&0xff000000) +#else +#define BYTE0WORD(w) ((w)&0xff000000) +#define BYTE1WORD(w) ((w)&0x00ff0000) +#define BYTE2WORD(w) ((w)&0x0000ff00) +#define BYTE3WORD(w) ((w)&0x000000ff) +#endif + +typedef union { + PRUint32 w[4]; + PRUint8 b[16]; +} rijndael_state; + +#define COLUMN_0(state) state.w[0] +#define COLUMN_1(state) state.w[1] +#define COLUMN_2(state) state.w[2] +#define COLUMN_3(state) state.w[3] + +#define STATE_BYTE(i) state.b[i] + +// out = a ^ b +inline static void +xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b) +{ + for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) { + (out)[j] = (a)[j] ^ (b)[j]; + } +} + +static void NO_SANITIZE_ALIGNMENT +rijndael_encryptBlock128(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + unsigned int r; + PRUint32 *roundkeyw; + rijndael_state state; + PRUint32 C0, C1, C2, C3; +#if defined(NSS_X86_OR_X64) +#define pIn input +#define pOut output +#else + unsigned char *pIn, *pOut; + PRUint32 inBuf[4], outBuf[4]; + + if ((ptrdiff_t)input & 0x3) { + memcpy(inBuf, input, sizeof inBuf); + pIn = (unsigned char *)inBuf; + } else { + pIn = (unsigned char *)input; + } + if ((ptrdiff_t)output & 0x3) { + pOut = (unsigned char *)outBuf; + } else { + pOut = (unsigned char *)output; + } +#endif + roundkeyw = cx->k.expandedKey; + /* Step 1: Add Round Key 0 to initial state */ + COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++; + COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++; + COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++; + COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; + /* Step 2: Loop over rounds [1..NR-1] */ + for (r = 1; r < cx->Nr; ++r) { + /* Do ShiftRow, ByteSub, and MixColumn all at once */ + C0 = T0(STATE_BYTE(0)) ^ + T1(STATE_BYTE(5)) ^ + T2(STATE_BYTE(10)) ^ + T3(STATE_BYTE(15)); + C1 = T0(STATE_BYTE(4)) ^ + T1(STATE_BYTE(9)) ^ + T2(STATE_BYTE(14)) ^ + T3(STATE_BYTE(3)); + C2 = T0(STATE_BYTE(8)) ^ + T1(STATE_BYTE(13)) ^ + T2(STATE_BYTE(2)) ^ + T3(STATE_BYTE(7)); + C3 = T0(STATE_BYTE(12)) ^ + T1(STATE_BYTE(1)) ^ + T2(STATE_BYTE(6)) ^ + T3(STATE_BYTE(11)); + /* Round key addition */ + COLUMN_0(state) = C0 ^ *roundkeyw++; + COLUMN_1(state) = C1 ^ *roundkeyw++; + COLUMN_2(state) = C2 ^ *roundkeyw++; + COLUMN_3(state) = C3 ^ *roundkeyw++; + } + /* Step 3: Do the last round */ + /* Final round does not employ MixColumn */ + C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | + (BYTE1WORD(T3(STATE_BYTE(5)))) | + (BYTE2WORD(T0(STATE_BYTE(10)))) | + (BYTE3WORD(T1(STATE_BYTE(15))))) ^ + *roundkeyw++; + C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | + (BYTE1WORD(T3(STATE_BYTE(9)))) | + (BYTE2WORD(T0(STATE_BYTE(14)))) | + (BYTE3WORD(T1(STATE_BYTE(3))))) ^ + *roundkeyw++; + C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | + (BYTE1WORD(T3(STATE_BYTE(13)))) | + (BYTE2WORD(T0(STATE_BYTE(2)))) | + (BYTE3WORD(T1(STATE_BYTE(7))))) ^ + *roundkeyw++; + C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | + (BYTE1WORD(T3(STATE_BYTE(1)))) | + (BYTE2WORD(T0(STATE_BYTE(6)))) | + (BYTE3WORD(T1(STATE_BYTE(11))))) ^ + *roundkeyw++; + *((PRUint32 *)pOut) = C0; + *((PRUint32 *)(pOut + 4)) = C1; + *((PRUint32 *)(pOut + 8)) = C2; + *((PRUint32 *)(pOut + 12)) = C3; +#if defined(NSS_X86_OR_X64) +#undef pIn +#undef pOut +#else + if ((ptrdiff_t)output & 0x3) { + memcpy(output, outBuf, sizeof outBuf); + } +#endif +} + +static void NO_SANITIZE_ALIGNMENT +rijndael_decryptBlock128(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + int r; + PRUint32 *roundkeyw; + rijndael_state state; + PRUint32 C0, C1, C2, C3; +#if defined(NSS_X86_OR_X64) +#define pIn input +#define pOut output +#else + unsigned char *pIn, *pOut; + PRUint32 inBuf[4], outBuf[4]; + + if ((ptrdiff_t)input & 0x3) { + memcpy(inBuf, input, sizeof inBuf); + pIn = (unsigned char *)inBuf; + } else { + pIn = (unsigned char *)input; + } + if ((ptrdiff_t)output & 0x3) { + pOut = (unsigned char *)outBuf; + } else { + pOut = (unsigned char *)output; + } +#endif + roundkeyw = cx->k.expandedKey + cx->Nb * cx->Nr + 3; + /* reverse the final key addition */ + COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; + COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; + COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; + COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--; + /* Loop over rounds in reverse [NR..1] */ + for (r = cx->Nr; r > 1; --r) { + /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ + C0 = TInv0(STATE_BYTE(0)) ^ + TInv1(STATE_BYTE(13)) ^ + TInv2(STATE_BYTE(10)) ^ + TInv3(STATE_BYTE(7)); + C1 = TInv0(STATE_BYTE(4)) ^ + TInv1(STATE_BYTE(1)) ^ + TInv2(STATE_BYTE(14)) ^ + TInv3(STATE_BYTE(11)); + C2 = TInv0(STATE_BYTE(8)) ^ + TInv1(STATE_BYTE(5)) ^ + TInv2(STATE_BYTE(2)) ^ + TInv3(STATE_BYTE(15)); + C3 = TInv0(STATE_BYTE(12)) ^ + TInv1(STATE_BYTE(9)) ^ + TInv2(STATE_BYTE(6)) ^ + TInv3(STATE_BYTE(3)); + /* Invert the key addition step */ + COLUMN_3(state) = C3 ^ *roundkeyw--; + COLUMN_2(state) = C2 ^ *roundkeyw--; + COLUMN_1(state) = C1 ^ *roundkeyw--; + COLUMN_0(state) = C0 ^ *roundkeyw--; + } + /* inverse sub */ + pOut[0] = SINV(STATE_BYTE(0)); + pOut[1] = SINV(STATE_BYTE(13)); + pOut[2] = SINV(STATE_BYTE(10)); + pOut[3] = SINV(STATE_BYTE(7)); + pOut[4] = SINV(STATE_BYTE(4)); + pOut[5] = SINV(STATE_BYTE(1)); + pOut[6] = SINV(STATE_BYTE(14)); + pOut[7] = SINV(STATE_BYTE(11)); + pOut[8] = SINV(STATE_BYTE(8)); + pOut[9] = SINV(STATE_BYTE(5)); + pOut[10] = SINV(STATE_BYTE(2)); + pOut[11] = SINV(STATE_BYTE(15)); + pOut[12] = SINV(STATE_BYTE(12)); + pOut[13] = SINV(STATE_BYTE(9)); + pOut[14] = SINV(STATE_BYTE(6)); + pOut[15] = SINV(STATE_BYTE(3)); + /* final key addition */ + *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; + *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; + *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; + *((PRUint32 *)pOut) ^= *roundkeyw--; +#if defined(NSS_X86_OR_X64) +#undef pIn +#undef pOut +#else + if ((ptrdiff_t)output & 0x3) { + memcpy(output, outBuf, sizeof outBuf); + } +#endif +} + +/************************************************************************** + * + * Rijndael modes of operation (ECB and CBC) + * + *************************************************************************/ + +static SECStatus +rijndael_encryptECB(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRBool aesni = aesni_support(); + while (inputLen > 0) { + if (aesni) { + rijndael_native_encryptBlock(cx, output, input); + } else { + rijndael_encryptBlock128(cx, output, input); + } + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus +rijndael_encryptCBC(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + unsigned char *lastblock = cx->iv; + unsigned char inblock[AES_BLOCK_SIZE * 8]; + PRBool aesni = aesni_support(); + + if (!inputLen) + return SECSuccess; + while (inputLen > 0) { + if (aesni) { + /* XOR with the last block (IV if first block) */ + native_xorBlock(inblock, input, lastblock); + /* encrypt */ + rijndael_native_encryptBlock(cx, output, inblock); + } else { + xorBlock(inblock, input, lastblock); + rijndael_encryptBlock128(cx, output, inblock); + } + + /* move to the next block */ + lastblock = output; + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; + } + memcpy(cx->iv, lastblock, AES_BLOCK_SIZE); + return SECSuccess; +} + +static SECStatus +rijndael_decryptECB(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRBool aesni = aesni_support(); + while (inputLen > 0) { + if (aesni) { + rijndael_native_decryptBlock(cx, output, input); + } else { + rijndael_decryptBlock128(cx, output, input); + } + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus +rijndael_decryptCBC(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + const unsigned char *in; + unsigned char *out; + unsigned char newIV[AES_BLOCK_SIZE]; + PRBool aesni = aesni_support(); + + if (!inputLen) + return SECSuccess; + PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); + in = input + (inputLen - AES_BLOCK_SIZE); + memcpy(newIV, in, AES_BLOCK_SIZE); + out = output + (inputLen - AES_BLOCK_SIZE); + while (inputLen > AES_BLOCK_SIZE) { + if (aesni) { + // Use hardware acceleration for normal AES parameters. + rijndael_native_decryptBlock(cx, out, in); + native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]); + } else { + rijndael_decryptBlock128(cx, out, in); + xorBlock(out, out, &in[-AES_BLOCK_SIZE]); + } + out -= AES_BLOCK_SIZE; + in -= AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; + } + if (in == input) { + if (aesni) { + rijndael_native_decryptBlock(cx, out, in); + native_xorBlock(out, out, cx->iv); + } else { + rijndael_decryptBlock128(cx, out, in); + xorBlock(out, out, cx->iv); + } + } + memcpy(cx->iv, newIV, AES_BLOCK_SIZE); + return SECSuccess; +} + +/************************************************************************ + * + * BLAPI Interface functions + * + * The following functions implement the encryption routines defined in + * BLAPI for the AES cipher, Rijndael. + * + ***********************************************************************/ + +AESContext * +AES_AllocateContext(void) +{ + return PORT_ZNewAligned(AESContext, 16, mem); +} + +/* +** Initialize a new AES context suitable for AES encryption/decryption in +** the ECB or CBC mode. +** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC +*/ +static SECStatus +aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, + const unsigned char *iv, int mode, unsigned int encrypt) +{ + unsigned int Nk; + PRBool use_hw_aes; + /* According to AES, block lengths are 128 and key lengths are 128, 192, or + * 256 bits. We support other key sizes as well [128, 256] as long as the + * length in bytes is divisible by 4. + */ + + if (key == NULL || + keysize < AES_BLOCK_SIZE || + keysize > 32 || + keysize % 4 != 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode != NSS_AES && mode != NSS_AES_CBC) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_AES_CBC && iv == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } +#if defined(NSS_X86_OR_X64) || defined(USE_HW_AES) + use_hw_aes = (aesni_support() || arm_aes_support()) && (keysize % 8) == 0; +#else + use_hw_aes = PR_FALSE; +#endif + /* Nb = (block size in bits) / 32 */ + cx->Nb = AES_BLOCK_SIZE / 4; + /* Nk = (key size in bits) / 32 */ + Nk = keysize / 4; + /* Obtain number of rounds from "table" */ + cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); + /* copy in the iv, if neccessary */ + if (mode == NSS_AES_CBC) { + memcpy(cx->iv, iv, AES_BLOCK_SIZE); +#ifdef USE_HW_AES + if (use_hw_aes) { + cx->worker = (freeblCipherFunc) + native_aes_cbc_worker(encrypt, keysize); + } else +#endif + { + cx->worker = (freeblCipherFunc)(encrypt + ? &rijndael_encryptCBC + : &rijndael_decryptCBC); + } + } else { +#ifdef USE_HW_AES + if (use_hw_aes) { + cx->worker = (freeblCipherFunc) + native_aes_ecb_worker(encrypt, keysize); + } else +#endif + { + cx->worker = (freeblCipherFunc)(encrypt + ? &rijndael_encryptECB + : &rijndael_decryptECB); + } + } + PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); + if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } +#ifdef USE_HW_AES + if (use_hw_aes) { + native_aes_init(encrypt, keysize); + } else +#endif + { + /* Generate expanded key */ + if (encrypt) { + if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES || + cx->mode == NSS_AES_CTR)) { + PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); + /* Prepare hardware key for normal AES parameters. */ + rijndael_native_key_expansion(cx, key, Nk); + } else { + rijndael_key_expansion(cx, key, Nk); + } + } else { + rijndael_invkey_expansion(cx, key, Nk); + } + BLAPI_CLEAR_STACK(256) + } + cx->worker_cx = cx; + cx->destroy = NULL; + cx->isBlock = PR_TRUE; + return SECSuccess; +} + +SECStatus +AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int blocksize) +{ + int basemode = mode; + PRBool baseencrypt = encrypt; + SECStatus rv; + + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (mode) { + case NSS_AES_CTS: + basemode = NSS_AES_CBC; + break; + case NSS_AES_GCM: + case NSS_AES_CTR: + basemode = NSS_AES; + baseencrypt = PR_TRUE; + break; + } + /* Make sure enough is initialized so we can safely call Destroy. */ + cx->worker_cx = NULL; + cx->destroy = NULL; + cx->mode = mode; + rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt); + if (rv != SECSuccess) { + AES_DestroyContext(cx, PR_FALSE); + return rv; + } + + /* finally, set up any mode specific contexts */ + cx->worker_aead = 0; + switch (mode) { + case NSS_AES_CTS: + cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); + cx->destroy = (freeblDestroyFunc)CTS_DestroyContext; + cx->isBlock = PR_FALSE; + break; + case NSS_AES_GCM: +#if defined(INTEL_GCM) && defined(USE_HW_AES) + if (aesni_support() && (keysize % 8) == 0 && avx_support() && + clmul_support()) { + cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate + : intel_AES_GCM_DecryptUpdate); + cx->worker_aead = (freeblAeadFunc)(encrypt ? intel_AES_GCM_EncryptAEAD + : intel_AES_GCM_DecryptAEAD); + cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext; + cx->isBlock = PR_FALSE; + } else +#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM) + if (ppc_crypto_support() && (keysize % 8) == 0) { + cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? ppc_AES_GCM_EncryptUpdate + : ppc_AES_GCM_DecryptUpdate); + cx->worker_aead = (freeblAeadFunc)(encrypt ? ppc_AES_GCM_EncryptAEAD + : ppc_AES_GCM_DecryptAEAD); + cx->destroy = (freeblDestroyFunc)ppc_AES_GCM_DestroyContext; + cx->isBlock = PR_FALSE; + } else +#endif + { + cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate + : GCM_DecryptUpdate); + cx->worker_aead = (freeblAeadFunc)(encrypt ? GCM_EncryptAEAD + : GCM_DecryptAEAD); + + cx->destroy = (freeblDestroyFunc)GCM_DestroyContext; + cx->isBlock = PR_FALSE; + } + break; + case NSS_AES_CTR: + cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv); +#if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64) + if (aesni_support() && (keysize % 8) == 0) { + cx->worker = (freeblCipherFunc)CTR_Update_HW_AES; + } else +#endif + { + cx->worker = (freeblCipherFunc)CTR_Update; + } + cx->destroy = (freeblDestroyFunc)CTR_DestroyContext; + cx->isBlock = PR_FALSE; + break; + default: + /* everything has already been set up by aes_InitContext, just + * return */ + return SECSuccess; + } + /* check to see if we succeeded in getting the worker context */ + if (cx->worker_cx == NULL) { + /* no, just destroy the existing context */ + cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ + /* below that this isn't necessary */ + AES_DestroyContext(cx, PR_FALSE); + return SECFailure; + } + return SECSuccess; +} + +/* AES_CreateContext + * + * create a new context for Rijndael operations + */ +AESContext * +AES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keysize, unsigned int blocksize) +{ + AESContext *cx = AES_AllocateContext(); + if (cx) { + SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, + blocksize); + if (rv != SECSuccess) { + AES_DestroyContext(cx, PR_TRUE); + cx = NULL; + } + } + return cx; +} + +/* + * AES_DestroyContext + * + * Zero an AES cipher context. If freeit is true, also free the pointer + * to the context. + */ +void +AES_DestroyContext(AESContext *cx, PRBool freeit) +{ + void *mem = cx->mem; + if (cx->worker_cx && cx->destroy) { + (*cx->destroy)(cx->worker_cx, PR_TRUE); + cx->worker_cx = NULL; + cx->destroy = NULL; + } + PORT_Memset(cx, 0, sizeof(AESContext)); + if (freeit) { + PORT_Free(mem); + } else { + /* if we are not freeing the context, restore mem, We may get called + * again to actually free the context */ + cx->mem = mem; + } +} + +/* + * AES_Encrypt + * + * Encrypt an arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +AES_Encrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + /* Check args */ + SECStatus rv; + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; +#if UINT_MAX > MP_32BIT_MAX + /* + * we can guarentee that GSM won't overlfow if we limit the input to + * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now. + * + * We do it here to cover both hardware and software GCM operations. + */ + { + PR_STATIC_ASSERT(sizeof(unsigned int) > 4); + } + if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +#else + /* if we can't pass in a 32_bit number, then no such check needed */ + { + PR_STATIC_ASSERT(sizeof(unsigned int) <= 4); + } +#endif + + rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, + input, inputLen, AES_BLOCK_SIZE); + BLAPI_CLEAR_STACK(256) + return rv; +} + +/* + * AES_Decrypt + * + * Decrypt and arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +AES_Decrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + SECStatus rv; + /* Check args */ + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if ((cx->mode != NSS_AES_GCM) && (maxOutputLen < inputLen)) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; + rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, + input, inputLen, AES_BLOCK_SIZE); + BLAPI_CLEAR_STACK(256) + return rv; +} + +/* + * AES_Encrypt_AEAD + * + * Encrypt using GCM or CCM. include the nonce, extra data, and the tag + */ +SECStatus +AES_AEAD(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + void *params, unsigned int paramsLen, + const unsigned char *aad, unsigned int aadLen) +{ + SECStatus rv; + /* Check args */ + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0) || (aad == NULL && aadLen != 0) || params == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (cx->worker_aead == NULL) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; +#if UINT_MAX > MP_32BIT_MAX + /* + * we can guarentee that GSM won't overlfow if we limit the input to + * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now. + * + * We do it here to cover both hardware and software GCM operations. + */ + { + PR_STATIC_ASSERT(sizeof(unsigned int) > 4); + } + if (inputLen > MP_32BIT_MAX) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +#else + /* if we can't pass in a 32_bit number, then no such check needed */ + { + PR_STATIC_ASSERT(sizeof(unsigned int) <= 4); + } +#endif + + rv = (*cx->worker_aead)(cx->worker_cx, output, outputLen, maxOutputLen, + input, inputLen, params, paramsLen, aad, aadLen, + AES_BLOCK_SIZE); + BLAPI_CLEAR_STACK(256) + return rv; +} diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h new file mode 100644 index 0000000000..6a69a38199 --- /dev/null +++ b/security/nss/lib/freebl/rijndael.h @@ -0,0 +1,80 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _RIJNDAEL_H_ +#define _RIJNDAEL_H_ 1 + +#include "blapii.h" +#include + +#if defined(NSS_X86_OR_X64) +/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */ +#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) +#pragma GCC push_options +#pragma GCC target("sse2") +#undef NSS_DISABLE_SSE2 +#define NSS_DISABLE_SSE2 1 +#endif /* GCC <= 4.8 */ + +#include /* __m128i */ + +#ifdef NSS_DISABLE_SSE2 +#undef NSS_DISABLE_SSE2 +#pragma GCC pop_options +#endif /* NSS_DISABLE_SSE2 */ +#endif + +/* RIJNDAEL_NUM_ROUNDS + * + * Number of rounds per execution + * Nk - number of key bytes + * Nb - blocksize (in bytes) + */ +#define RIJNDAEL_NUM_ROUNDS(Nk, Nb) \ + (PR_MAX(Nk, Nb) + 6) + +/* + * This magic number is (Nb_max * (Nr_max + 1)) + * where Nb_max is the maximum block size in 32-bit words, + * Nr_max is the maximum number of rounds, which is Nb_max + 6 + */ +#define RIJNDAEL_MAX_EXP_KEY_SIZE (4 * 15) + +/* AESContextStr + * + * Values which maintain the state for Rijndael encryption/decryption. + * + * keySchedule - 128-bit registers for the key-schedule + * iv - initialization vector for CBC mode + * Nb - the number of bytes in a block, specified by user + * Nr - the number of rounds, specified by a table + * expandedKey - the round keys in 4-byte words, the length is Nr * Nb + * worker - the encryption/decryption function to use with worker_cx + * destroy - if not NULL, the destroy function to use with worker_cx + * worker_cx - the context for worker and destroy + * isBlock - is the mode of operation a block cipher or a stream cipher? + */ +struct AESContextStr { + /* NOTE: Offsets to members in this struct are hardcoded in assembly. + * Don't change the struct without updating intel-aes.s and intel-gcm.s. */ + union { +#if defined(NSS_X86_OR_X64) + __m128i keySchedule[15]; +#endif + PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE]; + } k; + unsigned int Nb; + unsigned int Nr; + freeblCipherFunc worker; + unsigned char iv[AES_BLOCK_SIZE]; + freeblAeadFunc worker_aead; + freeblDestroyFunc destroy; + void *worker_cx; + PRBool isBlock; + int mode; + void *mem; /* Start of the allocated memory to free. */ +}; + +#endif /* _RIJNDAEL_H_ */ diff --git a/security/nss/lib/freebl/rijndael32.tab b/security/nss/lib/freebl/rijndael32.tab new file mode 100644 index 0000000000..59be7c2c09 --- /dev/null +++ b/security/nss/lib/freebl/rijndael32.tab @@ -0,0 +1,1219 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef RIJNDAEL_INCLUDE_TABLES +static const PRUint8 _S[256] = +{ + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, +202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, +183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, +208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, +205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, +224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, +231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, +186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, +112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, +225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, +140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22 +}; +#endif /* not RIJNDAEL_INCLUDE_TABLES */ + +static const PRUint8 _SInv[256] = +{ + 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, +124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, + 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, + 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, +114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, +108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, +144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, +208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, + 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, +150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, + 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, +252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, + 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, + 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, +160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, + 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125 +}; + +#ifdef RIJNDAEL_INCLUDE_TABLES +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T0[256] = +{ +0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, +0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, +0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f, +0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, +0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, +0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, +0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551, +0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, +0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, +0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, +0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d, +0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, +0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, +0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, +0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d, +0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, +0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, +0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, +0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d, +0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, +0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, +0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, +0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755, +0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, +0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, +0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, +0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264, +0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, +0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, +0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, +0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac, +0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, +0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, +0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, +0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c, +0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, +0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, +0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, +0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c, +0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, +0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, +0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, +0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c +}; +#else +static const PRUint32 _T0[256] = +{ +0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, +0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, +0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d, +0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, +0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, +0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, +0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4, +0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, +0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, +0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, +0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e, +0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, +0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, +0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, +0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46, +0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, +0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, +0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, +0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe, +0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, +0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, +0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, +0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2, +0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, +0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, +0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, +0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256, +0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, +0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, +0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, +0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa, +0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, +0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, +0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, +0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42, +0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, +0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, +0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, +0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22, +0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, +0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, +0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, +0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T1[256] = +{ +0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, +0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, +0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d, +0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, +0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, +0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, +0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4, +0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, +0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, +0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, +0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e, +0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, +0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, +0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, +0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46, +0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, +0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, +0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, +0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe, +0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, +0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, +0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, +0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2, +0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, +0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, +0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, +0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456, +0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, +0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, +0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, +0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa, +0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, +0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, +0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, +0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42, +0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, +0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, +0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, +0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22, +0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, +0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, +0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, +0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a +}; +#else +static const PRUint32 _T1[256] = +{ +0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, +0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, +0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282, +0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, +0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4, +0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, +0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5, +0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, +0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696, +0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, +0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383, +0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, +0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3, +0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, +0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb, +0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, +0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d, +0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, +0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3, +0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, +0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff, +0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, +0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7, +0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, +0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a, +0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, +0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232, +0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, +0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595, +0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, +0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656, +0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, +0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6, +0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, +0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e, +0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, +0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1, +0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, +0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e, +0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, +0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6, +0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, +0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T2[256] = +{ +0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, +0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, +0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82, +0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, +0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, +0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, +0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5, +0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, +0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, +0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, +0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83, +0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, +0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, +0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, +0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb, +0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, +0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, +0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, +0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3, +0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, +0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, +0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, +0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7, +0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, +0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, +0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, +0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632, +0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, +0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, +0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, +0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56, +0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, +0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, +0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, +0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e, +0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, +0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, +0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, +0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e, +0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, +0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, +0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, +0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16 +}; +#else +static const PRUint32 _T2[256] = +{ +0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b, +0x6fb1de6f, 0xc55491c5, 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, +0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, 0xca458fca, 0x829d1f82, +0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, +0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4, +0x7296e472, 0xc05b9bc0, 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, +0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, 0x345c6834, 0xa5f451a5, +0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, +0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796, +0x050f0a05, 0x9ab52f9a, 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, +0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, 0x091b1209, 0x839e1d83, +0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, +0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3, +0x2f715e2f, 0x84971384, 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, +0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, 0x6abed46a, 0xcb468dcb, +0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, +0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d, +0x33556633, 0x85941185, 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, +0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, 0x51f3a251, 0xa3fe5da3, +0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, +0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff, +0xf30efdf3, 0xd26dbfd2, 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, +0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, 0xc45793c4, 0xa7f255a7, +0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, +0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a, +0x90ab3b90, 0x88830b88, 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, +0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, 0xe03bdbe0, 0x32566432, +0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, +0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195, +0xe437d3e4, 0x798bf279, 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, +0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, 0x6cb4d86c, 0x56faac56, +0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, +0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6, +0xb4c773b4, 0xc65197c6, 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, +0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, 0x7090e070, 0x3e427c3e, +0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, +0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1, +0x1d273a1d, 0x9eb9279e, 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, +0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, 0x9bb62d9b, 0x1e223c1e, +0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, +0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6, +0x42c68442, 0x68b8d068, 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, +0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T3[256] = +{ +0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, +0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, +0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282, +0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, +0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, +0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, +0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5, +0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, +0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, +0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, +0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383, +0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, +0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, +0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, +0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb, +0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, +0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, +0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, +0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3, +0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, +0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, +0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, +0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7, +0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, +0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, +0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, +0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232, +0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, +0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, +0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, +0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656, +0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, +0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, +0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, +0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e, +0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, +0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, +0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, +0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e, +0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, +0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, +0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, +0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616 +}; +#else +static const PRUint32 _T3[256] = +{ +0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6, +0x6f6fb1de, 0xc5c55491, 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, +0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, 0xcaca458f, 0x82829d1f, +0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, +0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753, +0x727296e4, 0xc0c05b9b, 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, +0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, 0x34345c68, 0xa5a5f451, +0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, +0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137, +0x05050f0a, 0x9a9ab52f, 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, +0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, 0x09091b12, 0x83839e1d, +0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, +0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd, +0x2f2f715e, 0x84849713, 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, +0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, 0x6a6abed4, 0xcbcb468d, +0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, +0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a, +0x33335566, 0x85859411, 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, +0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, 0x5151f3a2, 0xa3a3fe5d, +0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, +0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5, +0xf3f30efd, 0xd2d26dbf, 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, +0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, 0xc4c45793, 0xa7a7f255, +0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, +0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54, +0x9090ab3b, 0x8888830b, 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, +0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, 0xe0e03bdb, 0x32325664, +0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, +0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431, +0xe4e437d3, 0x79798bf2, 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, +0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, 0x6c6cb4d8, 0x5656faac, +0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, +0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157, +0xb4b4c773, 0xc6c65197, 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, +0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, 0x707090e0, 0x3e3e427c, +0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, +0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899, +0x1d1d273a, 0x9e9eb927, 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, +0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, 0x9b9bb62d, 0x1e1e223c, +0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, +0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7, +0x4242c684, 0x6868b8d0, 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, +0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv0[256] = +{ +0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, +0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, +0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25, +0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, +0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, +0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, +0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5, +0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, +0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, +0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, +0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7, +0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, +0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, +0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, +0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6, +0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, +0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, +0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, +0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793, +0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, +0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, +0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, +0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb, +0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, +0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, +0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, +0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9, +0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, +0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, +0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, +0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e, +0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, +0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, +0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, +0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43, +0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, +0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, +0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, +0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255, +0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, +0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, +0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, +0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0 +}; +#else +static const PRUint32 _TInv0[256] = +{ +0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, +0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, +0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67, +0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, +0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, +0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, +0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182, +0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, +0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, +0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, +0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492, +0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, +0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, +0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, +0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997, +0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, +0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, +0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, +0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f, +0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, +0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad, +0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, +0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc, +0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, +0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3, +0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, +0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1, +0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, +0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8, +0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, +0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4, +0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, +0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331, +0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, +0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d, +0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, +0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252, +0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, +0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f, +0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, +0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c, +0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, +0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv1[256] = +{ +0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, +0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, +0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567, +0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, +0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, +0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, +0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582, +0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, +0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, +0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, +0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792, +0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, +0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, +0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, +0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697, +0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, +0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, +0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, +0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f, +0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, +0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, +0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, +0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc, +0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, +0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, +0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, +0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1, +0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, +0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, +0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, +0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4, +0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, +0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, +0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, +0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d, +0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, +0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, +0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, +0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f, +0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, +0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, +0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, +0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042 +}; +#else +static const PRUint32 _TInv1[256] = +{ +0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, +0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, +0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, 0x49deb15a, 0x6725ba1b, +0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, +0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421, +0x2949e069, 0x448ec9c8, 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, +0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, 0x1863df4a, 0x82e51a31, +0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, +0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02, +0x57e31f8f, 0x2a6655ab, 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, +0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, 0x2b8acf1c, 0x92a779b4, +0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, +0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef, +0x065e719f, 0x51bd6e10, 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, +0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, 0x241998fb, 0x97d6bde9, +0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, +0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed, +0xac1e1170, 0x4e6c5a72, 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, +0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, 0xb10c0a67, 0x0f9357e7, +0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, +0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7, +0xb92db6a8, 0xc8141ea9, 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, +0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, 0x768b4329, 0xdccb23c6, +0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, +0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230, +0xec0d8652, 0xd077c1e3, 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, +0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, 0xc787494e, 0xc1d938d1, +0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, +0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8, +0x5e2e39f7, 0xf582c3af, 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, +0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, 0x09cd2678, 0xf46e5918, +0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, +0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23, +0x30c6a594, 0xc035a266, 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, +0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, 0x8d764dd6, 0x4d43efb0, +0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, +0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2, +0x33e91056, 0x136dd647, 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, +0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, 0x599cd2df, 0x3f55f273, +0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, +0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225, +0x8b283c49, 0x41ff0d95, 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, +0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv2[256] = +{ +0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, +0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, +0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b, +0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, +0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, +0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, +0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231, +0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, +0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, +0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, +0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4, +0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, +0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, +0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, +0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9, +0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, +0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, +0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, +0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7, +0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, +0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, +0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, +0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6, +0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, +0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, +0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, +0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1, +0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, +0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, +0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, +0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418, +0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, +0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, +0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, +0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0, +0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, +0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, +0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, +0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73, +0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, +0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, +0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, +0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257 +}; +#else +static const PRUint32 _TInv2[256] = +{ +0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d, +0x58abacfa, 0x03934be3, 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, +0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, 0x5a49deb1, 0x1b6725ba, +0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, +0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874, +0x692949e0, 0xc8448ec9, 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, +0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, 0x4a1863df, 0x3182e51a, +0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, +0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b, +0x8f57e31f, 0xab2a6655, 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, +0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, 0x1c2b8acf, 0xb492a779, +0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, +0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060, +0x9f065e71, 0x1051bd6e, 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, +0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, 0xfb241998, 0xe997d6bd, +0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, +0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b, +0x70ac1e11, 0x724e6c5a, 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, +0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, 0x67b10c0a, 0xe70f9357, +0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, +0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b, +0xa8b92db6, 0xa9c8141e, 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, +0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, 0x29768b43, 0xc6dccb23, +0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, +0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2, +0x52ec0d86, 0xe3d077c1, 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, +0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, 0x4ec78749, 0xd1c1d938, +0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, +0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8, +0xf75e2e39, 0xaff582c3, 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, +0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, 0x7809cd26, 0x18f46e59, +0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, +0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f, +0x9430c6a5, 0x66c035a2, 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, +0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, 0xd68d764d, 0xb04d43ef, +0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, +0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db, +0x5633e910, 0x47136dd6, 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, +0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, 0xdf599cd2, 0x733f55f2, +0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, +0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2, +0x498b283c, 0x9541ff0d, 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, +0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv3[256] = +{ +0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, +0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, +0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba, +0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, +0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, +0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, +0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a, +0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, +0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, +0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, +0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479, +0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, +0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, +0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, +0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd, +0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, +0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, +0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, +0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757, +0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, +0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, +0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, +0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623, +0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, +0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, +0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, +0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138, +0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, +0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, +0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, +0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859, +0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, +0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, +0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, +0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef, +0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, +0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, +0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, +0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2, +0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, +0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, +0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, +0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8 +}; +#else +static const PRUint32 _TInv3[256] = +{ +0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f, +0xfa58abac, 0xe303934b, 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, +0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, 0xb15a49de, 0xba1b6725, +0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, +0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358, +0xe0692949, 0xc9c8448e, 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, +0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, 0xdf4a1863, 0x1a3182e5, +0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, +0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272, +0x1f8f57e3, 0x55ab2a66, 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, +0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, 0xcf1c2b8a, 0x79b492a7, +0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, +0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40, +0x719f065e, 0x6e1051bd, 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, +0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, 0x98fb2419, 0xbde997d6, +0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, +0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832, +0x1170ac1e, 0x5a724e6c, 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, +0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, 0x0a67b10c, 0x57e70f93, +0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, +0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2, +0xb6a8b92d, 0x1ea9c814, 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, +0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, 0x4329768b, 0x23c6dccb, +0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, +0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc, +0x8652ec0d, 0xc1e3d077, 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, +0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, 0x494ec787, 0x38d1c1d9, +0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, +0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890, +0x39f75e2e, 0xc3aff582, 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, +0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, 0x267809cd, 0x5918f46e, +0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, +0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a, +0xa59430c6, 0xa266c035, 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, +0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, 0x4dd68d76, 0xefb04d43, +0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, +0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292, +0x105633e9, 0xd647136d, 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, +0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, 0xd2df599c, 0xf2733f55, +0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, +0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc, +0x3c498b28, 0x0d9541ff, 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, +0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC0[256] = +{ +0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 0x2c342438, 0x27392d36, +0x3a2e3624, 0x31233f2a, 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, +0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 0xb0d090e0, 0xbbdd99ee, +0xa6ca82fc, 0xadc78bf2, 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, +0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 0xc48cfca8, 0xcf81f5a6, +0xd296eeb4, 0xd99be7ba, 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, +0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 0x23d373ab, 0x28de7aa5, +0x35c961b7, 0x3ec468b9, 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, +0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 0xe75f8f03, 0xec52860d, +0xf1459d1f, 0xfa489411, 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, +0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 0xf66d76ad, 0xfd607fa3, +0xe07764b1, 0xeb7a6dbf, 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, +0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 0x82311ae5, 0x893c13eb, +0x942b08f9, 0x9f2601f7, 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, +0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 0x1ed5ae3d, 0x15d8a733, +0x08cfbc21, 0x03c2b52f, 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, +0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 0xa1e2694e, 0xaaef6040, +0xb7f87b52, 0xbcf5725c, 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, +0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 0x3d06dd96, 0x360bd498, +0x2b1ccf8a, 0x2011c684, 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, +0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 0x495ab1de, 0x4257b8d0, +0x5f40a3c2, 0x544daacc, 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, +0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 0xafb2a431, 0xa4bfad3f, +0xb9a8b62d, 0xb2a5bf23, 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, +0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 0x6b3e5899, 0x60335197, +0x7d244a85, 0x7629438b, 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, +0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 0x8c61d79a, 0x876cde94, +0x9a7bc586, 0x9176cc88, 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, +0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 0xf83dbbd2, 0xf330b2dc, +0xee27a9ce, 0xe52aa0c0, 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, +0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 0x64d90f0a, 0x6fd40604, +0x72c31d16, 0x79ce1418, 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, +0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 0x2d83bed4, 0x268eb7da, +0x3b99acc8, 0x3094a5c6, 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, +0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 0xb1670a0c, 0xba6a0302, +0xa77d1810, 0xac70111e, 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, +0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 0xc53b6644, 0xce366f4a, +0xd3217458, 0xd82c7d56, 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, +0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 0x2264e947, 0x2969e049, +0x347efb5b, 0x3f73f255, 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, +0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 0xe6e815ef, 0xede51ce1, +0xf0f207f3, 0xfbff0efd, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, +0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d +}; +#else +static const PRUint32 _IMXC0[256] = +{ +0x00000000, 0x0e090d0b, 0x1c121a16, 0x121b171d, 0x3824342c, 0x362d3927, +0x24362e3a, 0x2a3f2331, 0x70486858, 0x7e416553, 0x6c5a724e, 0x62537f45, +0x486c5c74, 0x4665517f, 0x547e4662, 0x5a774b69, 0xe090d0b0, 0xee99ddbb, +0xfc82caa6, 0xf28bc7ad, 0xd8b4e49c, 0xd6bde997, 0xc4a6fe8a, 0xcaaff381, +0x90d8b8e8, 0x9ed1b5e3, 0x8ccaa2fe, 0x82c3aff5, 0xa8fc8cc4, 0xa6f581cf, +0xb4ee96d2, 0xbae79bd9, 0xdb3bbb7b, 0xd532b670, 0xc729a16d, 0xc920ac66, +0xe31f8f57, 0xed16825c, 0xff0d9541, 0xf104984a, 0xab73d323, 0xa57ade28, +0xb761c935, 0xb968c43e, 0x9357e70f, 0x9d5eea04, 0x8f45fd19, 0x814cf012, +0x3bab6bcb, 0x35a266c0, 0x27b971dd, 0x29b07cd6, 0x038f5fe7, 0x0d8652ec, +0x1f9d45f1, 0x119448fa, 0x4be30393, 0x45ea0e98, 0x57f11985, 0x59f8148e, +0x73c737bf, 0x7dce3ab4, 0x6fd52da9, 0x61dc20a2, 0xad766df6, 0xa37f60fd, +0xb16477e0, 0xbf6d7aeb, 0x955259da, 0x9b5b54d1, 0x894043cc, 0x87494ec7, +0xdd3e05ae, 0xd33708a5, 0xc12c1fb8, 0xcf2512b3, 0xe51a3182, 0xeb133c89, +0xf9082b94, 0xf701269f, 0x4de6bd46, 0x43efb04d, 0x51f4a750, 0x5ffdaa5b, +0x75c2896a, 0x7bcb8461, 0x69d0937c, 0x67d99e77, 0x3daed51e, 0x33a7d815, +0x21bccf08, 0x2fb5c203, 0x058ae132, 0x0b83ec39, 0x1998fb24, 0x1791f62f, +0x764dd68d, 0x7844db86, 0x6a5fcc9b, 0x6456c190, 0x4e69e2a1, 0x4060efaa, +0x527bf8b7, 0x5c72f5bc, 0x0605bed5, 0x080cb3de, 0x1a17a4c3, 0x141ea9c8, +0x3e218af9, 0x302887f2, 0x223390ef, 0x2c3a9de4, 0x96dd063d, 0x98d40b36, +0x8acf1c2b, 0x84c61120, 0xaef93211, 0xa0f03f1a, 0xb2eb2807, 0xbce2250c, +0xe6956e65, 0xe89c636e, 0xfa877473, 0xf48e7978, 0xdeb15a49, 0xd0b85742, +0xc2a3405f, 0xccaa4d54, 0x41ecdaf7, 0x4fe5d7fc, 0x5dfec0e1, 0x53f7cdea, +0x79c8eedb, 0x77c1e3d0, 0x65daf4cd, 0x6bd3f9c6, 0x31a4b2af, 0x3fadbfa4, +0x2db6a8b9, 0x23bfa5b2, 0x09808683, 0x07898b88, 0x15929c95, 0x1b9b919e, +0xa17c0a47, 0xaf75074c, 0xbd6e1051, 0xb3671d5a, 0x99583e6b, 0x97513360, +0x854a247d, 0x8b432976, 0xd134621f, 0xdf3d6f14, 0xcd267809, 0xc32f7502, +0xe9105633, 0xe7195b38, 0xf5024c25, 0xfb0b412e, 0x9ad7618c, 0x94de6c87, +0x86c57b9a, 0x88cc7691, 0xa2f355a0, 0xacfa58ab, 0xbee14fb6, 0xb0e842bd, +0xea9f09d4, 0xe49604df, 0xf68d13c2, 0xf8841ec9, 0xd2bb3df8, 0xdcb230f3, +0xcea927ee, 0xc0a02ae5, 0x7a47b13c, 0x744ebc37, 0x6655ab2a, 0x685ca621, +0x42638510, 0x4c6a881b, 0x5e719f06, 0x5078920d, 0x0a0fd964, 0x0406d46f, +0x161dc372, 0x1814ce79, 0x322bed48, 0x3c22e043, 0x2e39f75e, 0x2030fa55, +0xec9ab701, 0xe293ba0a, 0xf088ad17, 0xfe81a01c, 0xd4be832d, 0xdab78e26, +0xc8ac993b, 0xc6a59430, 0x9cd2df59, 0x92dbd252, 0x80c0c54f, 0x8ec9c844, +0xa4f6eb75, 0xaaffe67e, 0xb8e4f163, 0xb6edfc68, 0x0c0a67b1, 0x02036aba, +0x10187da7, 0x1e1170ac, 0x342e539d, 0x3a275e96, 0x283c498b, 0x26354480, +0x7c420fe9, 0x724b02e2, 0x605015ff, 0x6e5918f4, 0x44663bc5, 0x4a6f36ce, +0x587421d3, 0x567d2cd8, 0x37a10c7a, 0x39a80171, 0x2bb3166c, 0x25ba1b67, +0x0f853856, 0x018c355d, 0x13972240, 0x1d9e2f4b, 0x47e96422, 0x49e06929, +0x5bfb7e34, 0x55f2733f, 0x7fcd500e, 0x71c45d05, 0x63df4a18, 0x6dd64713, +0xd731dcca, 0xd938d1c1, 0xcb23c6dc, 0xc52acbd7, 0xef15e8e6, 0xe11ce5ed, +0xf307f2f0, 0xfd0efffb, 0xa779b492, 0xa970b999, 0xbb6bae84, 0xb562a38f, +0x9f5d80be, 0x91548db5, 0x834f9aa8, 0x8d4697a3 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC1[256] = +{ +0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d, 0x3424382c, 0x392d3627, +0x2e36243a, 0x233f2a31, 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245, +0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69, 0xd090e0b0, 0xdd99eebb, +0xca82fca6, 0xc78bf2ad, 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81, +0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5, 0x8cfca8c4, 0x81f5a6cf, +0x96eeb4d2, 0x9be7bad9, 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966, +0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a, 0xd373ab23, 0xde7aa528, +0xc961b735, 0xc468b93e, 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112, +0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6, 0x5f8f03e7, 0x52860dec, +0x459d1ff1, 0x489411fa, 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e, +0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2, 0x6d76adf6, 0x607fa3fd, +0x7764b1e0, 0x7a6dbfeb, 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7, +0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3, 0x311ae582, 0x3c13eb89, +0x2b08f994, 0x2601f79f, 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b, +0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777, 0xd5ae3d1e, 0xd8a73315, +0xcfbc2108, 0xc2b52f03, 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f, +0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490, 0xe2694ea1, 0xef6040aa, +0xf87b52b7, 0xf5725cbc, 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8, +0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4, 0x06dd963d, 0x0bd49836, +0x1ccf8a2b, 0x11c68420, 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c, +0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478, 0x5ab1de49, 0x57b8d042, +0x40a3c25f, 0x4daacc54, 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea, +0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6, 0xb2a431af, 0xbfad3fa4, +0xa8b62db9, 0xa5bf23b2, 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e, +0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a, 0x3e58996b, 0x33519760, +0x244a857d, 0x29438b76, 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302, +0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e, 0x61d79a8c, 0x6cde9487, +0x7bc5869a, 0x76cc8891, 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd, +0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9, 0x3dbbd2f8, 0x30b2dcf3, +0x27a9ceee, 0x2aa0c0e5, 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821, +0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d, 0xd90f0a64, 0xd406046f, +0xc31d1672, 0xce141879, 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055, +0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c, 0x83bed42d, 0x8eb7da26, +0x99acc83b, 0x94a5c630, 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44, +0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668, 0x670a0cb1, 0x6a0302ba, +0x7d1810a7, 0x70111eac, 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680, +0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4, 0x3b6644c5, 0x366f4ace, +0x217458d3, 0x2c7d56d8, 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567, +0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b, 0x64e94722, 0x69e04929, +0x7efb5b34, 0x73f2553f, 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13, +0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7, 0xe815efe6, 0xe51ce1ed, +0xf207f3f0, 0xff0efdfb, 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f, +0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3 +}; +#else +static const PRUint32 _IMXC1[256] = +{ +0x00000000, 0x0b0e090d, 0x161c121a, 0x1d121b17, 0x2c382434, 0x27362d39, +0x3a24362e, 0x312a3f23, 0x58704868, 0x537e4165, 0x4e6c5a72, 0x4562537f, +0x74486c5c, 0x7f466551, 0x62547e46, 0x695a774b, 0xb0e090d0, 0xbbee99dd, +0xa6fc82ca, 0xadf28bc7, 0x9cd8b4e4, 0x97d6bde9, 0x8ac4a6fe, 0x81caaff3, +0xe890d8b8, 0xe39ed1b5, 0xfe8ccaa2, 0xf582c3af, 0xc4a8fc8c, 0xcfa6f581, +0xd2b4ee96, 0xd9bae79b, 0x7bdb3bbb, 0x70d532b6, 0x6dc729a1, 0x66c920ac, +0x57e31f8f, 0x5ced1682, 0x41ff0d95, 0x4af10498, 0x23ab73d3, 0x28a57ade, +0x35b761c9, 0x3eb968c4, 0x0f9357e7, 0x049d5eea, 0x198f45fd, 0x12814cf0, +0xcb3bab6b, 0xc035a266, 0xdd27b971, 0xd629b07c, 0xe7038f5f, 0xec0d8652, +0xf11f9d45, 0xfa119448, 0x934be303, 0x9845ea0e, 0x8557f119, 0x8e59f814, +0xbf73c737, 0xb47dce3a, 0xa96fd52d, 0xa261dc20, 0xf6ad766d, 0xfda37f60, +0xe0b16477, 0xebbf6d7a, 0xda955259, 0xd19b5b54, 0xcc894043, 0xc787494e, +0xaedd3e05, 0xa5d33708, 0xb8c12c1f, 0xb3cf2512, 0x82e51a31, 0x89eb133c, +0x94f9082b, 0x9ff70126, 0x464de6bd, 0x4d43efb0, 0x5051f4a7, 0x5b5ffdaa, +0x6a75c289, 0x617bcb84, 0x7c69d093, 0x7767d99e, 0x1e3daed5, 0x1533a7d8, +0x0821bccf, 0x032fb5c2, 0x32058ae1, 0x390b83ec, 0x241998fb, 0x2f1791f6, +0x8d764dd6, 0x867844db, 0x9b6a5fcc, 0x906456c1, 0xa14e69e2, 0xaa4060ef, +0xb7527bf8, 0xbc5c72f5, 0xd50605be, 0xde080cb3, 0xc31a17a4, 0xc8141ea9, +0xf93e218a, 0xf2302887, 0xef223390, 0xe42c3a9d, 0x3d96dd06, 0x3698d40b, +0x2b8acf1c, 0x2084c611, 0x11aef932, 0x1aa0f03f, 0x07b2eb28, 0x0cbce225, +0x65e6956e, 0x6ee89c63, 0x73fa8774, 0x78f48e79, 0x49deb15a, 0x42d0b857, +0x5fc2a340, 0x54ccaa4d, 0xf741ecda, 0xfc4fe5d7, 0xe15dfec0, 0xea53f7cd, +0xdb79c8ee, 0xd077c1e3, 0xcd65daf4, 0xc66bd3f9, 0xaf31a4b2, 0xa43fadbf, +0xb92db6a8, 0xb223bfa5, 0x83098086, 0x8807898b, 0x9515929c, 0x9e1b9b91, +0x47a17c0a, 0x4caf7507, 0x51bd6e10, 0x5ab3671d, 0x6b99583e, 0x60975133, +0x7d854a24, 0x768b4329, 0x1fd13462, 0x14df3d6f, 0x09cd2678, 0x02c32f75, +0x33e91056, 0x38e7195b, 0x25f5024c, 0x2efb0b41, 0x8c9ad761, 0x8794de6c, +0x9a86c57b, 0x9188cc76, 0xa0a2f355, 0xabacfa58, 0xb6bee14f, 0xbdb0e842, +0xd4ea9f09, 0xdfe49604, 0xc2f68d13, 0xc9f8841e, 0xf8d2bb3d, 0xf3dcb230, +0xeecea927, 0xe5c0a02a, 0x3c7a47b1, 0x37744ebc, 0x2a6655ab, 0x21685ca6, +0x10426385, 0x1b4c6a88, 0x065e719f, 0x0d507892, 0x640a0fd9, 0x6f0406d4, +0x72161dc3, 0x791814ce, 0x48322bed, 0x433c22e0, 0x5e2e39f7, 0x552030fa, +0x01ec9ab7, 0x0ae293ba, 0x17f088ad, 0x1cfe81a0, 0x2dd4be83, 0x26dab78e, +0x3bc8ac99, 0x30c6a594, 0x599cd2df, 0x5292dbd2, 0x4f80c0c5, 0x448ec9c8, +0x75a4f6eb, 0x7eaaffe6, 0x63b8e4f1, 0x68b6edfc, 0xb10c0a67, 0xba02036a, +0xa710187d, 0xac1e1170, 0x9d342e53, 0x963a275e, 0x8b283c49, 0x80263544, +0xe97c420f, 0xe2724b02, 0xff605015, 0xf46e5918, 0xc544663b, 0xce4a6f36, +0xd3587421, 0xd8567d2c, 0x7a37a10c, 0x7139a801, 0x6c2bb316, 0x6725ba1b, +0x560f8538, 0x5d018c35, 0x40139722, 0x4b1d9e2f, 0x2247e964, 0x2949e069, +0x345bfb7e, 0x3f55f273, 0x0e7fcd50, 0x0571c45d, 0x1863df4a, 0x136dd647, +0xcad731dc, 0xc1d938d1, 0xdccb23c6, 0xd7c52acb, 0xe6ef15e8, 0xede11ce5, +0xf0f307f2, 0xfbfd0eff, 0x92a779b4, 0x99a970b9, 0x84bb6bae, 0x8fb562a3, +0xbe9f5d80, 0xb591548d, 0xa8834f9a, 0xa38d4697 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC2[256] = +{ +0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17, 0x24382c34, 0x2d362739, +0x36243a2e, 0x3f2a3123, 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f, +0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b, 0x90e0b0d0, 0x99eebbdd, +0x82fca6ca, 0x8bf2adc7, 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3, +0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af, 0xfca8c48c, 0xf5a6cf81, +0xeeb4d296, 0xe7bad99b, 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac, +0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98, 0x73ab23d3, 0x7aa528de, +0x61b735c9, 0x68b93ec4, 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0, +0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c, 0x8f03e75f, 0x860dec52, +0x9d1ff145, 0x9411fa48, 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14, +0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220, 0x76adf66d, 0x7fa3fd60, +0x64b1e077, 0x6dbfeb7a, 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e, +0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312, 0x1ae58231, 0x13eb893c, +0x08f9942b, 0x01f79f26, 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa, +0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e, 0xae3d1ed5, 0xa73315d8, +0xbc2108cf, 0xb52f03c2, 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6, +0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1, 0x694ea1e2, 0x6040aaef, +0x7b52b7f8, 0x725cbcf5, 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9, +0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d, 0xdd963d06, 0xd498360b, +0xcf8a2b1c, 0xc6842011, 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25, +0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879, 0xb1de495a, 0xb8d04257, +0xa3c25f40, 0xaacc544d, 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd, +0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9, 0xa431afb2, 0xad3fa4bf, +0xb62db9a8, 0xbf23b2a5, 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91, +0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d, 0x58996b3e, 0x51976033, +0x4a857d24, 0x438b7629, 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275, +0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41, 0xd79a8c61, 0xde94876c, +0xc5869a7b, 0xcc889176, 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42, +0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e, 0xbbd2f83d, 0xb2dcf330, +0xa9ceee27, 0xa0c0e52a, 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6, +0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92, 0x0f0a64d9, 0x06046fd4, +0x1d1672c3, 0x141879ce, 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa, +0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0, 0xbed42d83, 0xb7da268e, +0xacc83b99, 0xa5c63094, 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8, +0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc, 0x0a0cb167, 0x0302ba6a, +0x1810a77d, 0x111eac70, 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044, +0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418, 0x6644c53b, 0x6f4ace36, +0x7458d321, 0x7d56d82c, 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b, +0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f, 0xe9472264, 0xe0492969, +0xfb5b347e, 0xf2553f73, 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347, +0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb, 0x15efe6e8, 0x1ce1ede5, +0x07f3f0f2, 0x0efdfbff, 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3, +0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397 +}; +#else +static const PRUint32 _IMXC2[256] = +{ +0x00000000, 0x0d0b0e09, 0x1a161c12, 0x171d121b, 0x342c3824, 0x3927362d, +0x2e3a2436, 0x23312a3f, 0x68587048, 0x65537e41, 0x724e6c5a, 0x7f456253, +0x5c74486c, 0x517f4665, 0x4662547e, 0x4b695a77, 0xd0b0e090, 0xddbbee99, +0xcaa6fc82, 0xc7adf28b, 0xe49cd8b4, 0xe997d6bd, 0xfe8ac4a6, 0xf381caaf, +0xb8e890d8, 0xb5e39ed1, 0xa2fe8cca, 0xaff582c3, 0x8cc4a8fc, 0x81cfa6f5, +0x96d2b4ee, 0x9bd9bae7, 0xbb7bdb3b, 0xb670d532, 0xa16dc729, 0xac66c920, +0x8f57e31f, 0x825ced16, 0x9541ff0d, 0x984af104, 0xd323ab73, 0xde28a57a, +0xc935b761, 0xc43eb968, 0xe70f9357, 0xea049d5e, 0xfd198f45, 0xf012814c, +0x6bcb3bab, 0x66c035a2, 0x71dd27b9, 0x7cd629b0, 0x5fe7038f, 0x52ec0d86, +0x45f11f9d, 0x48fa1194, 0x03934be3, 0x0e9845ea, 0x198557f1, 0x148e59f8, +0x37bf73c7, 0x3ab47dce, 0x2da96fd5, 0x20a261dc, 0x6df6ad76, 0x60fda37f, +0x77e0b164, 0x7aebbf6d, 0x59da9552, 0x54d19b5b, 0x43cc8940, 0x4ec78749, +0x05aedd3e, 0x08a5d337, 0x1fb8c12c, 0x12b3cf25, 0x3182e51a, 0x3c89eb13, +0x2b94f908, 0x269ff701, 0xbd464de6, 0xb04d43ef, 0xa75051f4, 0xaa5b5ffd, +0x896a75c2, 0x84617bcb, 0x937c69d0, 0x9e7767d9, 0xd51e3dae, 0xd81533a7, +0xcf0821bc, 0xc2032fb5, 0xe132058a, 0xec390b83, 0xfb241998, 0xf62f1791, +0xd68d764d, 0xdb867844, 0xcc9b6a5f, 0xc1906456, 0xe2a14e69, 0xefaa4060, +0xf8b7527b, 0xf5bc5c72, 0xbed50605, 0xb3de080c, 0xa4c31a17, 0xa9c8141e, +0x8af93e21, 0x87f23028, 0x90ef2233, 0x9de42c3a, 0x063d96dd, 0x0b3698d4, +0x1c2b8acf, 0x112084c6, 0x3211aef9, 0x3f1aa0f0, 0x2807b2eb, 0x250cbce2, +0x6e65e695, 0x636ee89c, 0x7473fa87, 0x7978f48e, 0x5a49deb1, 0x5742d0b8, +0x405fc2a3, 0x4d54ccaa, 0xdaf741ec, 0xd7fc4fe5, 0xc0e15dfe, 0xcdea53f7, +0xeedb79c8, 0xe3d077c1, 0xf4cd65da, 0xf9c66bd3, 0xb2af31a4, 0xbfa43fad, +0xa8b92db6, 0xa5b223bf, 0x86830980, 0x8b880789, 0x9c951592, 0x919e1b9b, +0x0a47a17c, 0x074caf75, 0x1051bd6e, 0x1d5ab367, 0x3e6b9958, 0x33609751, +0x247d854a, 0x29768b43, 0x621fd134, 0x6f14df3d, 0x7809cd26, 0x7502c32f, +0x5633e910, 0x5b38e719, 0x4c25f502, 0x412efb0b, 0x618c9ad7, 0x6c8794de, +0x7b9a86c5, 0x769188cc, 0x55a0a2f3, 0x58abacfa, 0x4fb6bee1, 0x42bdb0e8, +0x09d4ea9f, 0x04dfe496, 0x13c2f68d, 0x1ec9f884, 0x3df8d2bb, 0x30f3dcb2, +0x27eecea9, 0x2ae5c0a0, 0xb13c7a47, 0xbc37744e, 0xab2a6655, 0xa621685c, +0x85104263, 0x881b4c6a, 0x9f065e71, 0x920d5078, 0xd9640a0f, 0xd46f0406, +0xc372161d, 0xce791814, 0xed48322b, 0xe0433c22, 0xf75e2e39, 0xfa552030, +0xb701ec9a, 0xba0ae293, 0xad17f088, 0xa01cfe81, 0x832dd4be, 0x8e26dab7, +0x993bc8ac, 0x9430c6a5, 0xdf599cd2, 0xd25292db, 0xc54f80c0, 0xc8448ec9, +0xeb75a4f6, 0xe67eaaff, 0xf163b8e4, 0xfc68b6ed, 0x67b10c0a, 0x6aba0203, +0x7da71018, 0x70ac1e11, 0x539d342e, 0x5e963a27, 0x498b283c, 0x44802635, +0x0fe97c42, 0x02e2724b, 0x15ff6050, 0x18f46e59, 0x3bc54466, 0x36ce4a6f, +0x21d35874, 0x2cd8567d, 0x0c7a37a1, 0x017139a8, 0x166c2bb3, 0x1b6725ba, +0x38560f85, 0x355d018c, 0x22401397, 0x2f4b1d9e, 0x642247e9, 0x692949e0, +0x7e345bfb, 0x733f55f2, 0x500e7fcd, 0x5d0571c4, 0x4a1863df, 0x47136dd6, +0xdccad731, 0xd1c1d938, 0xc6dccb23, 0xcbd7c52a, 0xe8e6ef15, 0xe5ede11c, +0xf2f0f307, 0xfffbfd0e, 0xb492a779, 0xb999a970, 0xae84bb6b, 0xa38fb562, +0x80be9f5d, 0x8db59154, 0x9aa8834f, 0x97a38d46 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC3[256] = +{ +0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b, 0x382c3424, 0x3627392d, +0x243a2e36, 0x2a31233f, 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53, +0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77, 0xe0b0d090, 0xeebbdd99, +0xfca6ca82, 0xf2adc78b, 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af, +0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3, 0xa8c48cfc, 0xa6cf81f5, +0xb4d296ee, 0xbad99be7, 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20, +0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804, 0xab23d373, 0xa528de7a, +0xb735c961, 0xb93ec468, 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c, +0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0, 0x03e75f8f, 0x0dec5286, +0x1ff1459d, 0x11fa4894, 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8, +0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc, 0xadf66d76, 0xa3fd607f, +0xb1e07764, 0xbfeb7a6d, 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49, +0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225, 0xe582311a, 0xeb893c13, +0xf9942b08, 0xf79f2601, 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd, +0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9, 0x3d1ed5ae, 0x3315d8a7, +0x2108cfbc, 0x2f03c2b5, 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691, +0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156, 0x4ea1e269, 0x40aaef60, +0x52b7f87b, 0x5cbcf572, 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e, +0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a, 0x963d06dd, 0x98360bd4, +0x8a2b1ccf, 0x842011c6, 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2, +0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e, 0xde495ab1, 0xd04257b8, +0xc25f40a3, 0xcc544daa, 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7, +0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3, 0x31afb2a4, 0x3fa4bfad, +0x2db9a8b6, 0x23b2a5bf, 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b, +0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67, 0x996b3e58, 0x97603351, +0x857d244a, 0x8b762943, 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f, +0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b, 0x9a8c61d7, 0x94876cde, +0x869a7bc5, 0x889176cc, 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8, +0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84, 0xd2f83dbb, 0xdcf330b2, +0xceee27a9, 0xc0e52aa0, 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c, +0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278, 0x0a64d90f, 0x046fd406, +0x1672c31d, 0x1879ce14, 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30, +0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081, 0xd42d83be, 0xda268eb7, +0xc83b99ac, 0xc63094a5, 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9, +0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced, 0x0cb1670a, 0x02ba6a03, +0x10a77d18, 0x1eac7011, 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435, +0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859, 0x44c53b66, 0x4ace366f, +0x58d32174, 0x56d82c7d, 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba, +0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e, 0x472264e9, 0x492969e0, +0x5b347efb, 0x553f73f2, 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6, +0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a, 0xefe6e815, 0xe1ede51c, +0xf3f0f207, 0xfdfbff0e, 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362, +0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746 +}; +#else +static const PRUint32 _IMXC3[256] = +{ +0x00000000, 0x090d0b0e, 0x121a161c, 0x1b171d12, 0x24342c38, 0x2d392736, +0x362e3a24, 0x3f23312a, 0x48685870, 0x4165537e, 0x5a724e6c, 0x537f4562, +0x6c5c7448, 0x65517f46, 0x7e466254, 0x774b695a, 0x90d0b0e0, 0x99ddbbee, +0x82caa6fc, 0x8bc7adf2, 0xb4e49cd8, 0xbde997d6, 0xa6fe8ac4, 0xaff381ca, +0xd8b8e890, 0xd1b5e39e, 0xcaa2fe8c, 0xc3aff582, 0xfc8cc4a8, 0xf581cfa6, +0xee96d2b4, 0xe79bd9ba, 0x3bbb7bdb, 0x32b670d5, 0x29a16dc7, 0x20ac66c9, +0x1f8f57e3, 0x16825ced, 0x0d9541ff, 0x04984af1, 0x73d323ab, 0x7ade28a5, +0x61c935b7, 0x68c43eb9, 0x57e70f93, 0x5eea049d, 0x45fd198f, 0x4cf01281, +0xab6bcb3b, 0xa266c035, 0xb971dd27, 0xb07cd629, 0x8f5fe703, 0x8652ec0d, +0x9d45f11f, 0x9448fa11, 0xe303934b, 0xea0e9845, 0xf1198557, 0xf8148e59, +0xc737bf73, 0xce3ab47d, 0xd52da96f, 0xdc20a261, 0x766df6ad, 0x7f60fda3, +0x6477e0b1, 0x6d7aebbf, 0x5259da95, 0x5b54d19b, 0x4043cc89, 0x494ec787, +0x3e05aedd, 0x3708a5d3, 0x2c1fb8c1, 0x2512b3cf, 0x1a3182e5, 0x133c89eb, +0x082b94f9, 0x01269ff7, 0xe6bd464d, 0xefb04d43, 0xf4a75051, 0xfdaa5b5f, +0xc2896a75, 0xcb84617b, 0xd0937c69, 0xd99e7767, 0xaed51e3d, 0xa7d81533, +0xbccf0821, 0xb5c2032f, 0x8ae13205, 0x83ec390b, 0x98fb2419, 0x91f62f17, +0x4dd68d76, 0x44db8678, 0x5fcc9b6a, 0x56c19064, 0x69e2a14e, 0x60efaa40, +0x7bf8b752, 0x72f5bc5c, 0x05bed506, 0x0cb3de08, 0x17a4c31a, 0x1ea9c814, +0x218af93e, 0x2887f230, 0x3390ef22, 0x3a9de42c, 0xdd063d96, 0xd40b3698, +0xcf1c2b8a, 0xc6112084, 0xf93211ae, 0xf03f1aa0, 0xeb2807b2, 0xe2250cbc, +0x956e65e6, 0x9c636ee8, 0x877473fa, 0x8e7978f4, 0xb15a49de, 0xb85742d0, +0xa3405fc2, 0xaa4d54cc, 0xecdaf741, 0xe5d7fc4f, 0xfec0e15d, 0xf7cdea53, +0xc8eedb79, 0xc1e3d077, 0xdaf4cd65, 0xd3f9c66b, 0xa4b2af31, 0xadbfa43f, +0xb6a8b92d, 0xbfa5b223, 0x80868309, 0x898b8807, 0x929c9515, 0x9b919e1b, +0x7c0a47a1, 0x75074caf, 0x6e1051bd, 0x671d5ab3, 0x583e6b99, 0x51336097, +0x4a247d85, 0x4329768b, 0x34621fd1, 0x3d6f14df, 0x267809cd, 0x2f7502c3, +0x105633e9, 0x195b38e7, 0x024c25f5, 0x0b412efb, 0xd7618c9a, 0xde6c8794, +0xc57b9a86, 0xcc769188, 0xf355a0a2, 0xfa58abac, 0xe14fb6be, 0xe842bdb0, +0x9f09d4ea, 0x9604dfe4, 0x8d13c2f6, 0x841ec9f8, 0xbb3df8d2, 0xb230f3dc, +0xa927eece, 0xa02ae5c0, 0x47b13c7a, 0x4ebc3774, 0x55ab2a66, 0x5ca62168, +0x63851042, 0x6a881b4c, 0x719f065e, 0x78920d50, 0x0fd9640a, 0x06d46f04, +0x1dc37216, 0x14ce7918, 0x2bed4832, 0x22e0433c, 0x39f75e2e, 0x30fa5520, +0x9ab701ec, 0x93ba0ae2, 0x88ad17f0, 0x81a01cfe, 0xbe832dd4, 0xb78e26da, +0xac993bc8, 0xa59430c6, 0xd2df599c, 0xdbd25292, 0xc0c54f80, 0xc9c8448e, +0xf6eb75a4, 0xffe67eaa, 0xe4f163b8, 0xedfc68b6, 0x0a67b10c, 0x036aba02, +0x187da710, 0x1170ac1e, 0x2e539d34, 0x275e963a, 0x3c498b28, 0x35448026, +0x420fe97c, 0x4b02e272, 0x5015ff60, 0x5918f46e, 0x663bc544, 0x6f36ce4a, +0x7421d358, 0x7d2cd856, 0xa10c7a37, 0xa8017139, 0xb3166c2b, 0xba1b6725, +0x8538560f, 0x8c355d01, 0x97224013, 0x9e2f4b1d, 0xe9642247, 0xe0692949, +0xfb7e345b, 0xf2733f55, 0xcd500e7f, 0xc45d0571, 0xdf4a1863, 0xd647136d, +0x31dccad7, 0x38d1c1d9, 0x23c6dccb, 0x2acbd7c5, 0x15e8e6ef, 0x1ce5ede1, +0x07f2f0f3, 0x0efffbfd, 0x79b492a7, 0x70b999a9, 0x6bae84bb, 0x62a38fb5, +0x5d80be9f, 0x548db591, 0x4f9aa883, 0x4697a38d +}; +#endif + +#endif /* RIJNDAEL_INCLUDE_TABLES */ + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 Rcon[30] = { +0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, +0x00000040, 0x00000080, 0x0000001b, 0x00000036, 0x0000006c, 0x000000d8, +0x000000ab, 0x0000004d, 0x0000009a, 0x0000002f, 0x0000005e, 0x000000bc, +0x00000063, 0x000000c6, 0x00000097, 0x00000035, 0x0000006a, 0x000000d4, +0x000000b3, 0x0000007d, 0x000000fa, 0x000000ef, 0x000000c5, 0x00000091 +}; +#else +static const PRUint32 Rcon[30] = { +0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, +0x40000000, 0x80000000, 0x1b000000, 0x36000000, 0x6c000000, 0xd8000000, +0xab000000, 0x4d000000, 0x9a000000, 0x2f000000, 0x5e000000, 0xbc000000, +0x63000000, 0xc6000000, 0x97000000, 0x35000000, 0x6a000000, 0xd4000000, +0xb3000000, 0x7d000000, 0xfa000000, 0xef000000, 0xc5000000, 0x91000000 +}; +#endif + diff --git a/security/nss/lib/freebl/rijndael_tables.c b/security/nss/lib/freebl/rijndael_tables.c new file mode 100644 index 0000000000..61316d13ea --- /dev/null +++ b/security/nss/lib/freebl/rijndael_tables.c @@ -0,0 +1,213 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "stdio.h" +#include "prtypes.h" +#include "blapi.h" + +/* + * what follows is code thrown together to generate the myriad of tables + * used by Rijndael, the AES cipher. + */ + +#define WORD_LE(b0, b1, b2, b3) \ + (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | b0) + +#define WORD_BE(b0, b1, b2, b3) \ + (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | b3) + +static const PRUint8 __S[256] = { + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, + 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, + 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, + 208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, + 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, + 224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, + 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, + 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, + 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, + 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, + 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22 +}; + +static const PRUint8 __SInv[256] = { + 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, + 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, + 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, + 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, + 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, + 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, + 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, + 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, + 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, + 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, + 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, + 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, + 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, + 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, + 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, + 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125 +}; + +/* GF_MULTIPLY + * + * multiply two bytes represented in GF(2**8), mod (x**4 + 1) + */ +PRUint8 +gf_multiply(PRUint8 a, PRUint8 b) +{ + PRUint8 res = 0; + while (b > 0) { + res = (b & 0x01) ? res ^ a : res; + a = (a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1); + b >>= 1; + } + return res; +} + +void +make_T_Table(char *table, const PRUint8 Sx[256], FILE *file, + unsigned char m0, unsigned char m1, + unsigned char m2, unsigned char m3) +{ + PRUint32 Ti; + int i; + fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n"); + fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table); + for (i = 0; i < 256; i++) { + Ti = WORD_LE(gf_multiply(Sx[i], m0), + gf_multiply(Sx[i], m1), + gf_multiply(Sx[i], m2), + gf_multiply(Sx[i], m3)); + if (Ti == 0) + fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + else + fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#else\n"); + fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table); + for (i = 0; i < 256; i++) { + Ti = WORD_BE(gf_multiply(Sx[i], m0), + gf_multiply(Sx[i], m1), + gf_multiply(Sx[i], m2), + gf_multiply(Sx[i], m3)); + if (Ti == 0) + fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + else + fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#endif\n\n"); +} + +void +make_InvMixCol_Table(int num, FILE *file, PRUint8 m0, PRUint8 m1, PRUint8 m2, PRUint8 m3) +{ + PRUint16 i; + PRUint8 b0, b1, b2, b3; + fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n"); + fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num); + for (i = 0; i < 256; i++) { + b0 = gf_multiply(i, m0); + b1 = gf_multiply(i, m1); + b2 = gf_multiply(i, m2); + b3 = gf_multiply(i, m3); + fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b3, b2, b1, b0, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#else\n"); + fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num); + for (i = 0; i < 256; i++) { + b0 = gf_multiply(i, m0); + b1 = gf_multiply(i, m1); + b2 = gf_multiply(i, m2); + b3 = gf_multiply(i, m3); + fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b0, b1, b2, b3, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#endif\n\n"); +} + +int +main() +{ + int i, j; + PRUint8 cur, last; + PRUint32 tmp; + FILE *optfile; + optfile = fopen("rijndael32.tab", "w"); + /* output S, if there are no T tables */ + fprintf(optfile, "#ifndef RIJNDAEL_INCLUDE_TABLES\n"); + fprintf(optfile, "static const PRUint8 _S[256] = \n{\n"); + for (i = 0; i < 256; i++) { + fprintf(optfile, "%3d%c%c", __S[i], (i == 255) ? ' ' : ',', + (i % 16 == 15) ? '\n' : ' '); + } + fprintf(optfile, "};\n#endif /* not RIJNDAEL_INCLUDE_TABLES */\n\n"); + /* output S**-1 */ + fprintf(optfile, "static const PRUint8 _SInv[256] = \n{\n"); + for (i = 0; i < 256; i++) { + fprintf(optfile, "%3d%c%c", __SInv[i], (i == 255) ? ' ' : ',', + (i % 16 == 15) ? '\n' : ' '); + } + fprintf(optfile, "};\n\n"); + fprintf(optfile, "#ifdef RIJNDAEL_INCLUDE_TABLES\n"); + /* The 32-bit word tables for optimized implementation */ + /* T0 = [ S[a] * 02, S[a], S[a], S[a] * 03 ] */ + make_T_Table("0", __S, optfile, 0x02, 0x01, 0x01, 0x03); + /* T1 = [ S[a] * 03, S[a] * 02, S[a], S[a] ] */ + make_T_Table("1", __S, optfile, 0x03, 0x02, 0x01, 0x01); + /* T2 = [ S[a], S[a] * 03, S[a] * 02, S[a] ] */ + make_T_Table("2", __S, optfile, 0x01, 0x03, 0x02, 0x01); + /* T3 = [ S[a], S[a], S[a] * 03, S[a] * 02 ] */ + make_T_Table("3", __S, optfile, 0x01, 0x01, 0x03, 0x02); + /* TInv0 = [ Si[a] * 0E, Si[a] * 09, Si[a] * 0D, Si[a] * 0B ] */ + make_T_Table("Inv0", __SInv, optfile, 0x0e, 0x09, 0x0d, 0x0b); + /* TInv1 = [ Si[a] * 0B, Si[a] * 0E, Si[a] * 09, Si[a] * 0D ] */ + make_T_Table("Inv1", __SInv, optfile, 0x0b, 0x0e, 0x09, 0x0d); + /* TInv2 = [ Si[a] * 0D, Si[a] * 0B, Si[a] * 0E, Si[a] * 09 ] */ + make_T_Table("Inv2", __SInv, optfile, 0x0d, 0x0b, 0x0e, 0x09); + /* TInv3 = [ Si[a] * 09, Si[a] * 0D, Si[a] * 0B, Si[a] * 0E ] */ + make_T_Table("Inv3", __SInv, optfile, 0x09, 0x0d, 0x0b, 0x0e); + /* byte multiply tables for inverse key expansion (mimics InvMixColumn) */ + make_InvMixCol_Table(0, optfile, 0x0e, 0x09, 0x0d, 0x0b); + make_InvMixCol_Table(1, optfile, 0x0b, 0x0E, 0x09, 0x0d); + make_InvMixCol_Table(2, optfile, 0x0d, 0x0b, 0x0e, 0x09); + make_InvMixCol_Table(3, optfile, 0x09, 0x0d, 0x0b, 0x0e); + fprintf(optfile, "#endif /* RIJNDAEL_INCLUDE_TABLES */\n\n"); + /* round constants for key expansion */ + fprintf(optfile, "#ifdef IS_LITTLE_ENDIAN\n"); + fprintf(optfile, "static const PRUint32 Rcon[30] = {\n"); + cur = 0x01; + for (i = 0; i < 30; i++) { + fprintf(optfile, "%#.8x%c%c", WORD_LE(cur, 0, 0, 0), + (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + last = cur; + cur = gf_multiply(last, 0x02); + } + fprintf(optfile, "};\n"); + fprintf(optfile, "#else\n"); + fprintf(optfile, "static const PRUint32 Rcon[30] = {\n"); + cur = 0x01; + for (i = 0; i < 30; i++) { + fprintf(optfile, "%#.8x%c%c", WORD_BE(cur, 0, 0, 0), + (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + last = cur; + cur = gf_multiply(last, 0x02); + } + fprintf(optfile, "};\n"); + fprintf(optfile, "#endif\n\n"); + fclose(optfile); + return 0; +} diff --git a/security/nss/lib/freebl/rsa.c b/security/nss/lib/freebl/rsa.c new file mode 100644 index 0000000000..4dac957902 --- /dev/null +++ b/security/nss/lib/freebl/rsa.c @@ -0,0 +1,1710 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * RSA key generation, public key op, private key op. + */ +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secerr.h" + +#include "prclist.h" +#include "nssilock.h" +#include "prinit.h" +#include "blapi.h" +#include "mpi.h" +#include "mpprime.h" +#include "mplogic.h" +#include "secmpi.h" +#include "secitem.h" +#include "blapii.h" + +/* The minimal required randomness is 64 bits */ +/* EXP_BLINDING_RANDOMNESS_LEN is the length of the randomness in mp_digits */ +/* for 32 bits platforts it is 2 mp_digits (= 2 * 32 bits), for 64 bits it is equal to 128 bits */ +#define EXP_BLINDING_RANDOMNESS_LEN ((128 + MP_DIGIT_BIT - 1) / MP_DIGIT_BIT) +#define EXP_BLINDING_RANDOMNESS_LEN_BYTES (EXP_BLINDING_RANDOMNESS_LEN * sizeof(mp_digit)) + +/* +** Number of times to attempt to generate a prime (p or q) from a random +** seed (the seed changes for each iteration). +*/ +#define MAX_PRIME_GEN_ATTEMPTS 10 +/* +** Number of times to attempt to generate a key. The primes p and q change +** for each attempt. +*/ +#define MAX_KEY_GEN_ATTEMPTS 10 + +/* Blinding Parameters max cache size */ +#define RSA_BLINDING_PARAMS_MAX_CACHE_SIZE 20 + +/* exponent should not be greater than modulus */ +#define BAD_RSA_KEY_SIZE(modLen, expLen) \ + ((expLen) > (modLen) || (modLen) > RSA_MAX_MODULUS_BITS / 8 || \ + (expLen) > RSA_MAX_EXPONENT_BITS / 8) + +struct blindingParamsStr; +typedef struct blindingParamsStr blindingParams; + +struct blindingParamsStr { + blindingParams *next; + mp_int f, g; /* blinding parameter */ + int counter; /* number of remaining uses of (f, g) */ +}; + +/* +** RSABlindingParamsStr +** +** For discussion of Paul Kocher's timing attack against an RSA private key +** operation, see http://www.cryptography.com/timingattack/paper.html. The +** countermeasure to this attack, known as blinding, is also discussed in +** the Handbook of Applied Cryptography, 11.118-11.119. +*/ +struct RSABlindingParamsStr { + /* Blinding-specific parameters */ + PRCList link; /* link to list of structs */ + SECItem modulus; /* list element "key" */ + blindingParams *free, *bp; /* Blinding parameters queue */ + blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE]; +}; +typedef struct RSABlindingParamsStr RSABlindingParams; + +/* +** RSABlindingParamsListStr +** +** List of key-specific blinding params. The arena holds the volatile pool +** of memory for each entry and the list itself. The lock is for list +** operations, in this case insertions and iterations, as well as control +** of the counter for each set of blinding parameters. +*/ +struct RSABlindingParamsListStr { + PZLock *lock; /* Lock for the list */ + PRCondVar *cVar; /* Condidtion Variable */ + int waitCount; /* Number of threads waiting on cVar */ + PRCList head; /* Pointer to the list */ +}; + +/* +** The master blinding params list. +*/ +static struct RSABlindingParamsListStr blindingParamsList = { 0 }; + +/* Number of times to reuse (f, g). Suggested by Paul Kocher */ +#define RSA_BLINDING_PARAMS_MAX_REUSE 50 + +/* Global, allows optional use of blinding. On by default. */ +/* Cannot be changed at the moment, due to thread-safety issues. */ +static PRBool nssRSAUseBlinding = PR_TRUE; + +static SECStatus +rsa_build_from_primes(const mp_int *p, const mp_int *q, + mp_int *e, PRBool needPublicExponent, + mp_int *d, PRBool needPrivateExponent, + RSAPrivateKey *key, unsigned int keySizeInBits) +{ + mp_int n, phi; + mp_int psub1, qsub1, tmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&n) = 0; + MP_DIGITS(&phi) = 0; + MP_DIGITS(&psub1) = 0; + MP_DIGITS(&qsub1) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&phi)); + CHECK_MPI_OK(mp_init(&psub1)); + CHECK_MPI_OK(mp_init(&qsub1)); + CHECK_MPI_OK(mp_init(&tmp)); + /* p and q must be distinct. */ + if (mp_cmp(p, q) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + /* 1. Compute n = p*q */ + CHECK_MPI_OK(mp_mul(p, q, &n)); + /* verify that the modulus has the desired number of bits */ + if ((unsigned)mpl_significant_bits(&n) != keySizeInBits) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + + /* at least one exponent must be given */ + PORT_Assert(!(needPublicExponent && needPrivateExponent)); + + /* 2. Compute phi = (p-1)*(q-1) */ + CHECK_MPI_OK(mp_sub_d(p, 1, &psub1)); + CHECK_MPI_OK(mp_sub_d(q, 1, &qsub1)); + if (needPublicExponent || needPrivateExponent) { + CHECK_MPI_OK(mp_lcm(&psub1, &qsub1, &phi)); + /* 3. Compute d = e**-1 mod(phi) */ + /* or e = d**-1 mod(phi) as necessary */ + if (needPublicExponent) { + err = mp_invmod(d, &phi, e); + } else { + err = mp_invmod(e, &phi, d); + } + } else { + err = MP_OKAY; + } + /* Verify that phi(n) and e have no common divisors */ + if (err != MP_OKAY) { + if (err == MP_UNDEF) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + err = MP_OKAY; /* to keep PORT_SetError from being called again */ + rv = SECFailure; + } + goto cleanup; + } + + /* 4. Compute exponent1 = d mod (p-1) */ + CHECK_MPI_OK(mp_mod(d, &psub1, &tmp)); + MPINT_TO_SECITEM(&tmp, &key->exponent1, key->arena); + /* 5. Compute exponent2 = d mod (q-1) */ + CHECK_MPI_OK(mp_mod(d, &qsub1, &tmp)); + MPINT_TO_SECITEM(&tmp, &key->exponent2, key->arena); + /* 6. Compute coefficient = q**-1 mod p */ + CHECK_MPI_OK(mp_invmod(q, p, &tmp)); + MPINT_TO_SECITEM(&tmp, &key->coefficient, key->arena); + + /* copy our calculated results, overwrite what is there */ + key->modulus.data = NULL; + MPINT_TO_SECITEM(&n, &key->modulus, key->arena); + key->privateExponent.data = NULL; + MPINT_TO_SECITEM(d, &key->privateExponent, key->arena); + key->publicExponent.data = NULL; + MPINT_TO_SECITEM(e, &key->publicExponent, key->arena); + key->prime1.data = NULL; + MPINT_TO_SECITEM(p, &key->prime1, key->arena); + key->prime2.data = NULL; + MPINT_TO_SECITEM(q, &key->prime2, key->arena); +cleanup: + mp_clear(&n); + mp_clear(&phi); + mp_clear(&psub1); + mp_clear(&qsub1); + mp_clear(&tmp); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +SECStatus +generate_prime(mp_int *prime, int primeLen) +{ + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + int piter; + unsigned char *pb = NULL; + pb = PORT_Alloc(primeLen); + if (!pb) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + for (piter = 0; piter < MAX_PRIME_GEN_ATTEMPTS; piter++) { + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen)); + pb[0] |= 0xC0; /* set two high-order bits */ + pb[primeLen - 1] |= 0x01; /* set low-order bit */ + CHECK_MPI_OK(mp_read_unsigned_octets(prime, pb, primeLen)); + err = mpp_make_prime_secure(prime, primeLen * 8, PR_FALSE); + if (err != MP_NO) + goto cleanup; + /* keep going while err == MP_NO */ + } +cleanup: + if (pb) + PORT_ZFree(pb, primeLen); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* + * make sure the key components meet fips186 requirements. + */ +static PRBool +rsa_fips186_verify(mp_int *p, mp_int *q, mp_int *d, int keySizeInBits) +{ + mp_int pq_diff; + mp_err err = MP_OKAY; + PRBool ret = PR_FALSE; + + if (keySizeInBits < 250) { + /* not a valid FIPS length, no point in our other tests */ + /* if you are here, and in FIPS mode, you are outside the security + * policy */ + return PR_TRUE; + } + + /* p & q are already known to be greater then sqrt(2)*2^(keySize/2-1) */ + /* we also know that gcd(p-1,e) = 1 and gcd(q-1,e) = 1 because the + * mp_invmod() function will fail. */ + /* now check p-q > 2^(keysize/2-100) */ + MP_DIGITS(&pq_diff) = 0; + CHECK_MPI_OK(mp_init(&pq_diff)); + /* NSS always has p > q, so we know pq_diff is positive */ + CHECK_MPI_OK(mp_sub(p, q, &pq_diff)); + if ((unsigned)mpl_significant_bits(&pq_diff) < (keySizeInBits / 2 - 100)) { + goto cleanup; + } + /* now verify d is large enough*/ + if ((unsigned)mpl_significant_bits(d) < (keySizeInBits / 2)) { + goto cleanup; + } + ret = PR_TRUE; + +cleanup: + mp_clear(&pq_diff); + return ret; +} + +/* +** Generate and return a new RSA public and private key. +** Both keys are encoded in a single RSAPrivateKey structure. +** "cx" is the random number generator context +** "keySizeInBits" is the size of the key to be generated, in bits. +** 512, 1024, etc. +** "publicExponent" when not NULL is a pointer to some data that +** represents the public exponent to use. The data is a byte +** encoded integer, in "big endian" order. +*/ +RSAPrivateKey * +RSA_NewKey(int keySizeInBits, SECItem *publicExponent) +{ + unsigned int primeLen; + mp_int p = { 0, 0, 0, NULL }; + mp_int q = { 0, 0, 0, NULL }; + mp_int e = { 0, 0, 0, NULL }; + mp_int d = { 0, 0, 0, NULL }; + int kiter; + int max_attempts; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + int prerr = 0; + RSAPrivateKey *key = NULL; + PLArenaPool *arena = NULL; + /* Require key size to be a multiple of 16 bits. */ + if (!publicExponent || keySizeInBits % 16 != 0 || + BAD_RSA_KEY_SIZE((unsigned int)keySizeInBits / 8, publicExponent->len)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + /* 1. Set the public exponent and check if it's uneven and greater than 2.*/ + MP_DIGITS(&e) = 0; + CHECK_MPI_OK(mp_init(&e)); + SECITEM_TO_MPINT(*publicExponent, &e); + if (mp_iseven(&e) || !(mp_cmp_d(&e, 2) > 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } +#ifndef NSS_FIPS_DISABLED + /* Check that the exponent is not smaller than 65537 */ + if (mp_cmp_d(&e, 0x10001) < 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } +#endif + + /* 2. Allocate arena & key */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + key = PORT_ArenaZNew(arena, RSAPrivateKey); + if (!key) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + key->arena = arena; + /* length of primes p and q (in bytes) */ + primeLen = keySizeInBits / (2 * PR_BITS_PER_BYTE); + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&d) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&d)); + /* 3. Set the version number (PKCS1 v1.5 says it should be zero) */ + SECITEM_AllocItem(arena, &key->version, 1); + key->version.data[0] = 0; + + kiter = 0; + max_attempts = 5 * (keySizeInBits / 2); /* FIPS 186-4 B.3.3 steps 4.7 and 5.8 */ + do { + PORT_SetError(0); + CHECK_SEC_OK(generate_prime(&p, primeLen)); + CHECK_SEC_OK(generate_prime(&q, primeLen)); + /* Assure p > q */ + /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any + * implementation optimization that requires p > q. We can remove + * this code in the future. + */ + if (mp_cmp(&p, &q) < 0) + mp_exch(&p, &q); + /* Attempt to use these primes to generate a key */ + rv = rsa_build_from_primes(&p, &q, + &e, PR_FALSE, /* needPublicExponent=false */ + &d, PR_TRUE, /* needPrivateExponent=true */ + key, keySizeInBits); + if (rv == SECSuccess) { + if (rsa_fips186_verify(&p, &q, &d, keySizeInBits)) { + break; + } + prerr = SEC_ERROR_NEED_RANDOM; /* retry with different values */ + } else { + prerr = PORT_GetError(); + } + kiter++; + /* loop until have primes */ + } while (prerr == SEC_ERROR_NEED_RANDOM && kiter < max_attempts); + +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&e); + mp_clear(&d); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv && arena) { + PORT_FreeArena(arena, PR_TRUE); + key = NULL; + } + return key; +} + +mp_err +rsa_is_prime(mp_int *p) +{ + int res; + + /* run a Fermat test */ + res = mpp_fermat(p, 2); + if (res != MP_OKAY) { + return res; + } + + /* If that passed, run some Miller-Rabin tests */ + res = mpp_pprime_secure(p, 2); + return res; +} + +/* + * Factorize a RSA modulus n into p and q by using the exponents e and d. + * + * In: e, d, n + * Out: p, q + * + * See Handbook of Applied Cryptography, 8.2.2(i). + * + * The algorithm is probabilistic, it is run 64 times and each run has a 50% + * chance of succeeding with a runtime of O(log(e*d)). + * + * The returned p might be smaller than q. + */ +static mp_err +rsa_factorize_n_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q, + mp_int *n) +{ + /* lambda is the private modulus: e*d = 1 mod lambda */ + /* so: e*d - 1 = k*lambda = t*2^s where t is odd */ + mp_int klambda; + mp_int t, onetwentyeight; + unsigned long s = 0; + unsigned long i; + + /* cand = a^(t * 2^i) mod n, next_cand = a^(t * 2^(i+1)) mod n */ + mp_int a; + mp_int cand; + mp_int next_cand; + + mp_int n_minus_one; + mp_err err = MP_OKAY; + + MP_DIGITS(&klambda) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&cand) = 0; + MP_DIGITS(&n_minus_one) = 0; + MP_DIGITS(&next_cand) = 0; + MP_DIGITS(&onetwentyeight) = 0; + CHECK_MPI_OK(mp_init(&klambda)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&cand)); + CHECK_MPI_OK(mp_init(&n_minus_one)); + CHECK_MPI_OK(mp_init(&next_cand)); + CHECK_MPI_OK(mp_init(&onetwentyeight)); + + mp_set_int(&onetwentyeight, 128); + + /* calculate k*lambda = e*d - 1 */ + CHECK_MPI_OK(mp_mul(e, d, &klambda)); + CHECK_MPI_OK(mp_sub_d(&klambda, 1, &klambda)); + + /* factorize klambda into t*2^s */ + CHECK_MPI_OK(mp_copy(&klambda, &t)); + while (mpp_divis_d(&t, 2) == MP_YES) { + CHECK_MPI_OK(mp_div_2(&t, &t)); + s += 1; + } + + /* precompute n_minus_one = n - 1 */ + CHECK_MPI_OK(mp_copy(n, &n_minus_one)); + CHECK_MPI_OK(mp_sub_d(&n_minus_one, 1, &n_minus_one)); + + /* pick random bases a, each one has a 50% leading to a factorization */ + CHECK_MPI_OK(mp_set_int(&a, 2)); + /* The following is equivalent to for (a=2, a <= 128, a+=2) */ + while (mp_cmp(&a, &onetwentyeight) <= 0) { + /* compute the base cand = a^(t * 2^0) [i = 0] */ + CHECK_MPI_OK(mp_exptmod(&a, &t, n, &cand)); + + for (i = 0; i < s; i++) { + /* condition 1: skip the base if we hit a trivial factor of n */ + if (mp_cmp(&cand, &n_minus_one) == 0 || mp_cmp_d(&cand, 1) == 0) { + break; + } + + /* increase i in a^(t * 2^i) by squaring the number */ + CHECK_MPI_OK(mp_exptmod_d(&cand, 2, n, &next_cand)); + + /* condition 2: a^(t * 2^(i+1)) = 1 mod n */ + if (mp_cmp_d(&next_cand, 1) == 0) { + /* conditions verified, gcd(a^(t * 2^i) - 1, n) is a factor */ + CHECK_MPI_OK(mp_sub_d(&cand, 1, &cand)); + CHECK_MPI_OK(mp_gcd(&cand, n, p)); + if (mp_cmp_d(p, 1) == 0) { + CHECK_MPI_OK(mp_add_d(&cand, 1, &cand)); + break; + } + CHECK_MPI_OK(mp_div(n, p, q, NULL)); + goto cleanup; + } + CHECK_MPI_OK(mp_copy(&next_cand, &cand)); + } + + CHECK_MPI_OK(mp_add_d(&a, 2, &a)); + } + + /* if we reach here it's likely (2^64 - 1 / 2^64) that d is wrong */ + err = MP_RANGE; + +cleanup: + mp_clear(&klambda); + mp_clear(&t); + mp_clear(&a); + mp_clear(&cand); + mp_clear(&n_minus_one); + mp_clear(&next_cand); + mp_clear(&onetwentyeight); + return err; +} + +/* + * Try to find the two primes based on 2 exponents plus a prime. + * + * In: e, d and p. + * Out: p,q. + * + * Step 1, Since d = e**-1 mod phi, we know that d*e == 1 mod phi, or + * d*e = 1+k*phi, or d*e-1 = k*phi. since d is less than phi and e is + * usually less than d, then k must be an integer between e-1 and 1 + * (probably on the order of e). + * Step 1a, We can divide k*phi by prime-1 and get k*(q-1). This will reduce + * the size of our division through the rest of the loop. + * Step 2, Loop through the values k=e-1 to 1 looking for k. k should be on + * the order or e, and e is typically small. This may take a while for + * a large random e. We are looking for a k that divides kphi + * evenly. Once we find a k that divides kphi evenly, we assume it + * is the true k. It's possible this k is not the 'true' k but has + * swapped factors of p-1 and/or q-1. Because of this, we + * tentatively continue Steps 3-6 inside this loop, and may return looking + * for another k on failure. + * Step 3, Calculate our tentative phi=kphi/k. Note: real phi is (p-1)*(q-1). + * Step 4a, kphi is k*(q-1), so phi is our tenative q-1. q = phi+1. + * If k is correct, q should be the right length and prime. + * Step 4b, It's possible q-1 and k could have swapped factors. We now have a + * possible solution that meets our criteria. It may not be the only + * solution, however, so we keep looking. If we find more than one, + * we will fail since we cannot determine which is the correct + * solution, and returning the wrong modulus will compromise both + * moduli. If no other solution is found, we return the unique solution. + * + * This will return p & q. q may be larger than p in the case that p was given + * and it was the smaller prime. + */ +static mp_err +rsa_get_prime_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q, + mp_int *n, unsigned int keySizeInBits) +{ + mp_int kphi; /* k*phi */ + mp_int k; /* current guess at 'k' */ + mp_int phi; /* (p-1)(q-1) */ + mp_int r; /* remainder */ + mp_int tmp; /* p-1 if p is given */ + mp_err err = MP_OKAY; + unsigned int order_k; + + MP_DIGITS(&kphi) = 0; + MP_DIGITS(&phi) = 0; + MP_DIGITS(&k) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&kphi)); + CHECK_MPI_OK(mp_init(&phi)); + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&tmp)); + + /* our algorithm looks for a factor k whose maximum size is dependent + * on the size of our smallest exponent, which had better be the public + * exponent (if it's the private, the key is vulnerable to a brute force + * attack). + * + * since our factor search is linear, we need to limit the maximum + * size of the public key. this should not be a problem normally, since + * public keys are usually small. + * + * if we want to handle larger public key sizes, we should have + * a version which tries to 'completely' factor k*phi (where completely + * means 'factor into primes, or composites with which are products of + * large primes). Once we have all the factors, we can sort them out and + * try different combinations to form our phi. The risk is if (p-1)/2, + * (q-1)/2, and k are all large primes. In any case if the public key + * is small (order of 20 some bits), then a linear search for k is + * manageable. + */ + if (mpl_significant_bits(e) > 23) { + err = MP_RANGE; + goto cleanup; + } + + /* calculate k*phi = e*d - 1 */ + CHECK_MPI_OK(mp_mul(e, d, &kphi)); + CHECK_MPI_OK(mp_sub_d(&kphi, 1, &kphi)); + + /* kphi is (e*d)-1, which is the same as k*(p-1)(q-1) + * d < (p-1)(q-1), therefor k must be less than e-1 + * We can narrow down k even more, though. Since p and q are odd and both + * have their high bit set, then we know that phi must be on order of + * keySizeBits. + */ + order_k = (unsigned)mpl_significant_bits(&kphi) - keySizeInBits; + + /* for (k=kinit; order(k) >= order_k; k--) { */ + /* k=kinit: k can't be bigger than kphi/2^(keySizeInBits -1) */ + CHECK_MPI_OK(mp_2expt(&k, keySizeInBits - 1)); + CHECK_MPI_OK(mp_div(&kphi, &k, &k, NULL)); + if (mp_cmp(&k, e) >= 0) { + /* also can't be bigger then e-1 */ + CHECK_MPI_OK(mp_sub_d(e, 1, &k)); + } + + /* calculate our temp value */ + /* This saves recalculating this value when the k guess is wrong, which + * is reasonably frequent. */ + /* tmp = p-1 (used to calculate q-1= phi/tmp) */ + CHECK_MPI_OK(mp_sub_d(p, 1, &tmp)); + CHECK_MPI_OK(mp_div(&kphi, &tmp, &kphi, &r)); + if (mp_cmp_z(&r) != 0) { + /* p-1 doesn't divide kphi, some parameter wasn't correct */ + err = MP_RANGE; + goto cleanup; + } + mp_zero(q); + /* kphi is now k*(q-1) */ + + /* rest of the for loop */ + for (; (err == MP_OKAY) && (mpl_significant_bits(&k) >= order_k); + err = mp_sub_d(&k, 1, &k)) { + CHECK_MPI_OK(err); + /* looking for k as a factor of kphi */ + CHECK_MPI_OK(mp_div(&kphi, &k, &phi, &r)); + if (mp_cmp_z(&r) != 0) { + /* not a factor, try the next one */ + continue; + } + /* we have a possible phi, see if it works */ + if ((unsigned)mpl_significant_bits(&phi) != keySizeInBits / 2) { + /* phi is not the right size */ + continue; + } + /* phi should be divisible by 2, since + * q is odd and phi=(q-1). */ + if (mpp_divis_d(&phi, 2) == MP_NO) { + /* phi is not divisible by 4 */ + continue; + } + /* we now have a candidate for the second prime */ + CHECK_MPI_OK(mp_add_d(&phi, 1, &tmp)); + + /* check to make sure it is prime */ + err = rsa_is_prime(&tmp); + if (err != MP_OKAY) { + if (err == MP_NO) { + /* No, then we still have the wrong phi */ + continue; + } + goto cleanup; + } + /* + * It is possible that we have the wrong phi if + * k_guess*(q_guess-1) = k*(q-1) (k and q-1 have swapped factors). + * since our q_quess is prime, however. We have found a valid + * rsa key because: + * q is the correct order of magnitude. + * phi = (p-1)(q-1) where p and q are both primes. + * e*d mod phi = 1. + * There is no way to know from the info given if this is the + * original key. We never want to return the wrong key because if + * two moduli with the same factor is known, then euclid's gcd + * algorithm can be used to find that factor. Even though the + * caller didn't pass the original modulus, it doesn't mean the + * modulus wasn't known or isn't available somewhere. So to be safe + * if we can't be sure we have the right q, we don't return any. + * + * So to make sure we continue looking for other valid q's. If none + * are found, then we can safely return this one, otherwise we just + * fail */ + if (mp_cmp_z(q) != 0) { + /* this is the second valid q, don't return either, + * just fail */ + err = MP_RANGE; + break; + } + /* we only have one q so far, save it and if no others are found, + * it's safe to return it */ + CHECK_MPI_OK(mp_copy(&tmp, q)); + continue; + } + if ((unsigned)mpl_significant_bits(&k) < order_k) { + if (mp_cmp_z(q) == 0) { + /* If we get here, something was wrong with the parameters we + * were given */ + err = MP_RANGE; + } + } +cleanup: + mp_clear(&kphi); + mp_clear(&phi); + mp_clear(&k); + mp_clear(&r); + mp_clear(&tmp); + return err; +} + +/* + * take a private key with only a few elements and fill out the missing pieces. + * + * All the entries will be overwritten with data allocated out of the arena + * If no arena is supplied, one will be created. + * + * The following fields must be supplied in order for this function + * to succeed: + * one of either publicExponent or privateExponent + * two more of the following 5 parameters. + * modulus (n) + * prime1 (p) + * prime2 (q) + * publicExponent (e) + * privateExponent (d) + * + * NOTE: if only the publicExponent, privateExponent, and one prime is given, + * then there may be more than one RSA key that matches that combination. + * + * All parameters will be replaced in the key structure with new parameters + * Allocated out of the arena. There is no attempt to free the old structures. + * Prime1 will always be greater than prime2 (even if the caller supplies the + * smaller prime as prime1 or the larger prime as prime2). The parameters are + * not overwritten on failure. + * + * How it works: + * We can generate all the parameters from one of the exponents, plus the + * two primes. (rsa_build_key_from_primes) + * If we are given one of the exponents and both primes, we are done. + * If we are given one of the exponents, the modulus and one prime, we + * caclulate the second prime by dividing the modulus by the given + * prime, giving us an exponent and 2 primes. + * If we are given 2 exponents and one of the primes we calculate + * k*phi = d*e-1, where k is an integer less than d which + * divides d*e-1. We find factor k so we can isolate phi. + * phi = (p-1)(q-1) + * We can use phi to find the other prime as follows: + * q = (phi/(p-1)) + 1. We now have 2 primes and an exponent. + * (NOTE: if more then one prime meets this condition, the operation + * will fail. See comments elsewhere in this file about this). + * (rsa_get_prime_from_exponents) + * If we are given 2 exponents and the modulus we factor the modulus to + * get the 2 missing primes (rsa_factorize_n_from_exponents) + * + */ +SECStatus +RSA_PopulatePrivateKey(RSAPrivateKey *key) +{ + PLArenaPool *arena = NULL; + PRBool needPublicExponent = PR_TRUE; + PRBool needPrivateExponent = PR_TRUE; + PRBool hasModulus = PR_FALSE; + unsigned int keySizeInBits = 0; + int prime_count = 0; + /* standard RSA nominclature */ + mp_int p, q, e, d, n; + /* remainder */ + mp_int r; + mp_err err = 0; + SECStatus rv = SECFailure; + + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&d) = 0; + MP_DIGITS(&n) = 0; + MP_DIGITS(&r) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&d)); + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&r)); + + /* if the key didn't already have an arena, create one. */ + if (key->arena == NULL) { + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + goto cleanup; + } + key->arena = arena; + } + + /* load up the known exponents */ + if (key->publicExponent.data) { + SECITEM_TO_MPINT(key->publicExponent, &e); + needPublicExponent = PR_FALSE; + } + if (key->privateExponent.data) { + SECITEM_TO_MPINT(key->privateExponent, &d); + needPrivateExponent = PR_FALSE; + } + if (needPrivateExponent && needPublicExponent) { + /* Not enough information, we need at least one exponent */ + err = MP_BADARG; + goto cleanup; + } + + /* load up the known primes. If only one prime is given, it will be + * assigned 'p'. Once we have both primes, well make sure p is the larger. + * The value prime_count tells us howe many we have acquired. + */ + if (key->prime1.data) { + int primeLen = key->prime1.len; + if (key->prime1.data[0] == 0) { + primeLen--; + } + keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE; + SECITEM_TO_MPINT(key->prime1, &p); + prime_count++; + } + if (key->prime2.data) { + int primeLen = key->prime2.len; + if (key->prime2.data[0] == 0) { + primeLen--; + } + keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE; + SECITEM_TO_MPINT(key->prime2, prime_count ? &q : &p); + prime_count++; + } + /* load up the modulus */ + if (key->modulus.data) { + int modLen = key->modulus.len; + if (key->modulus.data[0] == 0) { + modLen--; + } + keySizeInBits = modLen * PR_BITS_PER_BYTE; + SECITEM_TO_MPINT(key->modulus, &n); + hasModulus = PR_TRUE; + } + /* if we have the modulus and one prime, calculate the second. */ + if ((prime_count == 1) && (hasModulus)) { + if (mp_div(&n, &p, &q, &r) != MP_OKAY || mp_cmp_z(&r) != 0) { + /* p is not a factor or n, fail */ + err = MP_BADARG; + goto cleanup; + } + prime_count++; + } + + /* If we didn't have enough primes try to calculate the primes from + * the exponents */ + if (prime_count < 2) { + /* if we don't have at least 2 primes at this point, then we need both + * exponents and one prime or a modulus*/ + if (!needPublicExponent && !needPrivateExponent && + (prime_count > 0)) { + CHECK_MPI_OK(rsa_get_prime_from_exponents(&e, &d, &p, &q, &n, + keySizeInBits)); + } else if (!needPublicExponent && !needPrivateExponent && hasModulus) { + CHECK_MPI_OK(rsa_factorize_n_from_exponents(&e, &d, &p, &q, &n)); + } else { + /* not enough given parameters to get both primes */ + err = MP_BADARG; + goto cleanup; + } + } + + /* Assure p > q */ + /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any + * implementation optimization that requires p > q. We can remove + * this code in the future. + */ + if (mp_cmp(&p, &q) < 0) + mp_exch(&p, &q); + + /* we now have our 2 primes and at least one exponent, we can fill + * in the key */ + rv = rsa_build_from_primes(&p, &q, + &e, needPublicExponent, + &d, needPrivateExponent, + key, keySizeInBits); +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&e); + mp_clear(&d); + mp_clear(&n); + mp_clear(&r); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv && arena) { + PORT_FreeArena(arena, PR_TRUE); + key->arena = NULL; + } + return rv; +} + +static unsigned int +rsa_modulusLen(SECItem *modulus) +{ + if (modulus->len == 0) { + return 0; + }; + unsigned char byteZero = modulus->data[0]; + unsigned int modLen = modulus->len - !byteZero; + return modLen; +} + +/* +** Perform a raw public-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +SECStatus +RSA_PublicKeyOp(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input) +{ + unsigned int modLen, expLen, offset; + mp_int n, e, m, c; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + if (!key || !output || !input) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + MP_DIGITS(&n) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&c) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&m)); + CHECK_MPI_OK(mp_init(&c)); + modLen = rsa_modulusLen(&key->modulus); + expLen = rsa_modulusLen(&key->publicExponent); + + if (modLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + goto cleanup; + } + + /* 1. Obtain public key (n, e) */ + if (BAD_RSA_KEY_SIZE(modLen, expLen)) { + PORT_SetError(SEC_ERROR_INVALID_KEY); + rv = SECFailure; + goto cleanup; + } + SECITEM_TO_MPINT(key->modulus, &n); + SECITEM_TO_MPINT(key->publicExponent, &e); + if (e.used > n.used) { + /* exponent should not be greater than modulus */ + PORT_SetError(SEC_ERROR_INVALID_KEY); + rv = SECFailure; + goto cleanup; + } + /* 2. check input out of range (needs to be in range [0..n-1]) */ + offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */ + if (memcmp(input, key->modulus.data + offset, modLen) >= 0) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + rv = SECFailure; + goto cleanup; + } + /* 2 bis. Represent message as integer in range [0..n-1] */ + CHECK_MPI_OK(mp_read_unsigned_octets(&m, input, modLen)); +/* 3. Compute c = m**e mod n */ +#ifdef USE_MPI_EXPT_D + /* XXX see which is faster */ + if (MP_USED(&e) == 1) { + CHECK_MPI_OK(mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c)); + } else +#endif + CHECK_MPI_OK(mp_exptmod(&m, &e, &n, &c)); + /* 4. result c is ciphertext */ + err = mp_to_fixlen_octets(&c, output, modLen); + if (err >= 0) + err = MP_OKAY; +cleanup: + mp_clear(&n); + mp_clear(&e); + mp_clear(&m); + mp_clear(&c); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** RSA Private key operation (no CRT). +*/ +static SECStatus +rsa_PrivateKeyOpNoCRT(RSAPrivateKey *key, mp_int *m, mp_int *c, mp_int *n, + unsigned int modLen) +{ + mp_int d; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&d) = 0; + CHECK_MPI_OK(mp_init(&d)); + SECITEM_TO_MPINT(key->privateExponent, &d); + /* 1. m = c**d mod n */ + CHECK_MPI_OK(mp_exptmod(c, &d, n, m)); +cleanup: + mp_clear(&d); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** RSA Private key operation using CRT. +*/ +static SECStatus +rsa_PrivateKeyOpCRTNoCheck(RSAPrivateKey *key, mp_int *m, mp_int *c) +{ + mp_int p, q, d_p, d_q, qInv; + /* + The length of the randomness comes from the papers: + https://link.springer.com/chapter/10.1007/978-3-642-29912-4_7 + https://link.springer.com/chapter/10.1007/978-3-642-21554-4_5. + */ + mp_int blinding_dp, blinding_dq, r1, r2; + unsigned char random_block[EXP_BLINDING_RANDOMNESS_LEN_BYTES]; + mp_int m1, m2, h, ctmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&d_p) = 0; + MP_DIGITS(&d_q) = 0; + MP_DIGITS(&qInv) = 0; + MP_DIGITS(&m1) = 0; + MP_DIGITS(&m2) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&ctmp) = 0; + MP_DIGITS(&blinding_dp) = 0; + MP_DIGITS(&blinding_dq) = 0; + MP_DIGITS(&r1) = 0; + MP_DIGITS(&r2) = 0; + + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&d_p)); + CHECK_MPI_OK(mp_init(&d_q)); + CHECK_MPI_OK(mp_init(&qInv)); + CHECK_MPI_OK(mp_init(&m1)); + CHECK_MPI_OK(mp_init(&m2)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&ctmp)); + CHECK_MPI_OK(mp_init(&blinding_dp)); + CHECK_MPI_OK(mp_init(&blinding_dq)); + CHECK_MPI_OK(mp_init_size(&r1, EXP_BLINDING_RANDOMNESS_LEN)); + CHECK_MPI_OK(mp_init_size(&r2, EXP_BLINDING_RANDOMNESS_LEN)); + + /* copy private key parameters into mp integers */ + SECITEM_TO_MPINT(key->prime1, &p); /* p */ + SECITEM_TO_MPINT(key->prime2, &q); /* q */ + SECITEM_TO_MPINT(key->exponent1, &d_p); /* d_p = d mod (p-1) */ + SECITEM_TO_MPINT(key->exponent2, &d_q); /* d_q = d mod (q-1) */ + SECITEM_TO_MPINT(key->coefficient, &qInv); /* qInv = q**-1 mod p */ + + // blinding_dp = 1 + CHECK_MPI_OK(mp_set_int(&blinding_dp, 1)); + // blinding_dp = p - 1 + CHECK_MPI_OK(mp_sub(&p, &blinding_dp, &blinding_dp)); + // generating a random value + RNG_GenerateGlobalRandomBytes(random_block, EXP_BLINDING_RANDOMNESS_LEN_BYTES); + MP_USED(&r1) = EXP_BLINDING_RANDOMNESS_LEN; + memcpy(MP_DIGITS(&r1), random_block, sizeof(random_block)); + // blinding_dp = random * (p - 1) + CHECK_MPI_OK(mp_mul(&blinding_dp, &r1, &blinding_dp)); + //d_p = d_p + random * (p - 1) + CHECK_MPI_OK(mp_add(&d_p, &blinding_dp, &d_p)); + + // blinding_dq = 1 + CHECK_MPI_OK(mp_set_int(&blinding_dq, 1)); + // blinding_dq = q - 1 + CHECK_MPI_OK(mp_sub(&q, &blinding_dq, &blinding_dq)); + // generating a random value + RNG_GenerateGlobalRandomBytes(random_block, EXP_BLINDING_RANDOMNESS_LEN_BYTES); + memcpy(MP_DIGITS(&r2), random_block, sizeof(random_block)); + MP_USED(&r2) = EXP_BLINDING_RANDOMNESS_LEN; + // blinding_dq = random * (q - 1) + CHECK_MPI_OK(mp_mul(&blinding_dq, &r2, &blinding_dq)); + //d_q = d_q + random * (q-1) + CHECK_MPI_OK(mp_add(&d_q, &blinding_dq, &d_q)); + + /* 1. m1 = c**d_p mod p */ + CHECK_MPI_OK(mp_mod(c, &p, &ctmp)); + CHECK_MPI_OK(mp_exptmod(&ctmp, &d_p, &p, &m1)); + /* 2. m2 = c**d_q mod q */ + CHECK_MPI_OK(mp_mod(c, &q, &ctmp)); + CHECK_MPI_OK(mp_exptmod(&ctmp, &d_q, &q, &m2)); + /* 3. h = (m1 - m2) * qInv mod p */ + CHECK_MPI_OK(mp_submod(&m1, &m2, &p, &h)); + CHECK_MPI_OK(mp_mulmod(&h, &qInv, &p, &h)); + /* 4. m = m2 + h * q */ + CHECK_MPI_OK(mp_mul(&h, &q, m)); + CHECK_MPI_OK(mp_add(m, &m2, m)); +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&d_p); + mp_clear(&d_q); + mp_clear(&qInv); + mp_clear(&m1); + mp_clear(&m2); + mp_clear(&h); + mp_clear(&ctmp); + mp_clear(&blinding_dp); + mp_clear(&blinding_dq); + mp_clear(&r1); + mp_clear(&r2); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** An attack against RSA CRT was described by Boneh, DeMillo, and Lipton in: +** "On the Importance of Eliminating Errors in Cryptographic Computations", +** http://theory.stanford.edu/~dabo/papers/faults.ps.gz +** +** As a defense against the attack, carry out the private key operation, +** followed up with a public key operation to invert the result. +** Verify that result against the input. +*/ +static SECStatus +rsa_PrivateKeyOpCRTCheckedPubKey(RSAPrivateKey *key, mp_int *m, mp_int *c) +{ + mp_int n, e, v; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&n) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&v) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&v)); + CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, m, c)); + SECITEM_TO_MPINT(key->modulus, &n); + SECITEM_TO_MPINT(key->publicExponent, &e); + /* Perform a public key operation v = m ** e mod n */ + CHECK_MPI_OK(mp_exptmod(m, &e, &n, &v)); + if (mp_cmp(&v, c) != 0) { + rv = SECFailure; + } +cleanup: + mp_clear(&n); + mp_clear(&e); + mp_clear(&v); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +static PRCallOnceType coBPInit = { 0, 0, 0 }; +static PRStatus +init_blinding_params_list(void) +{ + blindingParamsList.lock = PZ_NewLock(nssILockOther); + if (!blindingParamsList.lock) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return PR_FAILURE; + } + blindingParamsList.cVar = PR_NewCondVar(blindingParamsList.lock); + if (!blindingParamsList.cVar) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return PR_FAILURE; + } + blindingParamsList.waitCount = 0; + PR_INIT_CLIST(&blindingParamsList.head); + return PR_SUCCESS; +} + +static SECStatus +generate_blinding_params(RSAPrivateKey *key, mp_int *f, mp_int *g, mp_int *n, + unsigned int modLen) +{ + SECStatus rv = SECSuccess; + mp_int e, k; + mp_err err = MP_OKAY; + unsigned char *kb = NULL; + + MP_DIGITS(&e) = 0; + MP_DIGITS(&k) = 0; + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&k)); + SECITEM_TO_MPINT(key->publicExponent, &e); + /* generate random k < n */ + kb = PORT_Alloc(modLen); + if (!kb) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen)); + /* k < n */ + CHECK_MPI_OK(mp_mod(&k, n, &k)); + /* f = k**e mod n */ + CHECK_MPI_OK(mp_exptmod(&k, &e, n, f)); + /* g = k**-1 mod n */ + CHECK_MPI_OK(mp_invmod(&k, n, g)); +cleanup: + if (kb) + PORT_ZFree(kb, modLen); + mp_clear(&k); + mp_clear(&e); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +static SECStatus +init_blinding_params(RSABlindingParams *rsabp, RSAPrivateKey *key, + mp_int *n, unsigned int modLen) +{ + blindingParams *bp = rsabp->array; + int i = 0; + + /* Initialize the list pointer for the element */ + PR_INIT_CLIST(&rsabp->link); + for (i = 0; i < RSA_BLINDING_PARAMS_MAX_CACHE_SIZE; ++i, ++bp) { + bp->next = bp + 1; + MP_DIGITS(&bp->f) = 0; + MP_DIGITS(&bp->g) = 0; + bp->counter = 0; + } + /* The last bp->next value was initialized with out + * of rsabp->array pointer and must be set to NULL + */ + rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL; + + bp = rsabp->array; + rsabp->bp = NULL; + rsabp->free = bp; + + /* List elements are keyed using the modulus */ + return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus); +} + +static SECStatus +get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen, + mp_int *f, mp_int *g) +{ + RSABlindingParams *rsabp = NULL; + blindingParams *bpUnlinked = NULL; + blindingParams *bp; + PRCList *el; + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; + int cmp = -1; + PRBool holdingLock = PR_FALSE; + + do { + if (blindingParamsList.lock == NULL) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* Acquire the list lock */ + PZ_Lock(blindingParamsList.lock); + holdingLock = PR_TRUE; + + /* Walk the list looking for the private key */ + for (el = PR_NEXT_LINK(&blindingParamsList.head); + el != &blindingParamsList.head; + el = PR_NEXT_LINK(el)) { + rsabp = (RSABlindingParams *)el; + cmp = SECITEM_CompareItem(&rsabp->modulus, &key->modulus); + if (cmp >= 0) { + /* The key is found or not in the list. */ + break; + } + } + + if (cmp) { + /* At this point, the key is not in the list. el should point to + ** the list element before which this key should be inserted. + */ + rsabp = PORT_ZNew(RSABlindingParams); + if (!rsabp) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + + rv = init_blinding_params(rsabp, key, n, modLen); + if (rv != SECSuccess) { + PORT_ZFree(rsabp, sizeof(RSABlindingParams)); + goto cleanup; + } + + /* Insert the new element into the list + ** If inserting in the middle of the list, el points to the link + ** to insert before. Otherwise, the link needs to be appended to + ** the end of the list, which is the same as inserting before the + ** head (since el would have looped back to the head). + */ + PR_INSERT_BEFORE(&rsabp->link, el); + } + + /* We've found (or created) the RSAblindingParams struct for this key. + * Now, search its list of ready blinding params for a usable one. + */ + while (0 != (bp = rsabp->bp)) { +#ifdef UNSAFE_FUZZER_MODE + /* Found a match and there are still remaining uses left */ + /* Return the parameters */ + CHECK_MPI_OK(mp_copy(&bp->f, f)); + CHECK_MPI_OK(mp_copy(&bp->g, g)); + + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; +#else + if (--(bp->counter) > 0) { + /* Found a match and there are still remaining uses left */ + /* Return the parameters */ + CHECK_MPI_OK(mp_copy(&bp->f, f)); + CHECK_MPI_OK(mp_copy(&bp->g, g)); + + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; + } + /* exhausted this one, give its values to caller, and + * then retire it. + */ + mp_exch(&bp->f, f); + mp_exch(&bp->g, g); + mp_clear(&bp->f); + mp_clear(&bp->g); + bp->counter = 0; + /* Move to free list */ + rsabp->bp = bp->next; + bp->next = rsabp->free; + rsabp->free = bp; + /* In case there're threads waiting for new blinding + * value - notify 1 thread the value is ready + */ + if (blindingParamsList.waitCount > 0) { + PR_NotifyCondVar(blindingParamsList.cVar); + blindingParamsList.waitCount--; + } + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; +#endif + } + /* We did not find a usable set of blinding params. Can we make one? */ + /* Find a free bp struct. */ + if ((bp = rsabp->free) != NULL) { + /* unlink this bp */ + rsabp->free = bp->next; + bp->next = NULL; + bpUnlinked = bp; /* In case we fail */ + + PZ_Unlock(blindingParamsList.lock); + holdingLock = PR_FALSE; + /* generate blinding parameter values for the current thread */ + CHECK_SEC_OK(generate_blinding_params(key, f, g, n, modLen)); + + /* put the blinding parameter values into cache */ + CHECK_MPI_OK(mp_init(&bp->f)); + CHECK_MPI_OK(mp_init(&bp->g)); + CHECK_MPI_OK(mp_copy(f, &bp->f)); + CHECK_MPI_OK(mp_copy(g, &bp->g)); + + /* Put this at head of queue of usable params. */ + PZ_Lock(blindingParamsList.lock); + holdingLock = PR_TRUE; + (void)holdingLock; + /* initialize RSABlindingParamsStr */ + bp->counter = RSA_BLINDING_PARAMS_MAX_REUSE; + bp->next = rsabp->bp; + rsabp->bp = bp; + bpUnlinked = NULL; + /* In case there're threads waiting for new blinding value + * just notify them the value is ready + */ + if (blindingParamsList.waitCount > 0) { + PR_NotifyAllCondVar(blindingParamsList.cVar); + blindingParamsList.waitCount = 0; + } + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; + } + /* Here, there are no usable blinding parameters available, + * and no free bp blocks, presumably because they're all + * actively having parameters generated for them. + * So, we need to wait here and not eat up CPU until some + * change happens. + */ + blindingParamsList.waitCount++; + PR_WaitCondVar(blindingParamsList.cVar, PR_INTERVAL_NO_TIMEOUT); + PZ_Unlock(blindingParamsList.lock); + holdingLock = PR_FALSE; + (void)holdingLock; + } while (1); + +cleanup: + /* It is possible to reach this after the lock is already released. */ + if (bpUnlinked) { + if (!holdingLock) { + PZ_Lock(blindingParamsList.lock); + holdingLock = PR_TRUE; + } + bp = bpUnlinked; + mp_clear(&bp->f); + mp_clear(&bp->g); + bp->counter = 0; + /* Must put the unlinked bp back on the free list */ + bp->next = rsabp->free; + rsabp->free = bp; + } + if (holdingLock) { + PZ_Unlock(blindingParamsList.lock); + } + if (err) { + MP_TO_SEC_ERROR(err); + } + return SECFailure; +} + +/* +** Perform a raw private-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +static SECStatus +rsa_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input, + PRBool check) +{ + unsigned int modLen; + unsigned int offset; + SECStatus rv = SECSuccess; + mp_err err; + mp_int n, c, m; + mp_int f, g; + if (!key || !output || !input) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* check input out of range (needs to be in range [0..n-1]) */ + modLen = rsa_modulusLen(&key->modulus); + if (modLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */ + if (memcmp(input, key->modulus.data + offset, modLen) >= 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + MP_DIGITS(&n) = 0; + MP_DIGITS(&c) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&f) = 0; + MP_DIGITS(&g) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&m)); + CHECK_MPI_OK(mp_init(&f)); + CHECK_MPI_OK(mp_init(&g)); + SECITEM_TO_MPINT(key->modulus, &n); + OCTETS_TO_MPINT(input, &c, modLen); + /* If blinding, compute pre-image of ciphertext by multiplying by + ** blinding factor + */ + if (nssRSAUseBlinding) { + CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g)); + /* c' = c*f mod n */ + CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c)); + } + /* Do the private key operation m = c**d mod n */ + if (key->prime1.len == 0 || + key->prime2.len == 0 || + key->exponent1.len == 0 || + key->exponent2.len == 0 || + key->coefficient.len == 0) { + CHECK_SEC_OK(rsa_PrivateKeyOpNoCRT(key, &m, &c, &n, modLen)); + } else if (check) { + CHECK_SEC_OK(rsa_PrivateKeyOpCRTCheckedPubKey(key, &m, &c)); + } else { + CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c)); + } + /* If blinding, compute post-image of plaintext by multiplying by + ** blinding factor + */ + if (nssRSAUseBlinding) { + /* m = m'*g mod n */ + CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m)); + } + err = mp_to_fixlen_octets(&m, output, modLen); + if (err >= 0) + err = MP_OKAY; +cleanup: + mp_clear(&n); + mp_clear(&c); + mp_clear(&m); + mp_clear(&f); + mp_clear(&g); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +SECStatus +RSA_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + return rsa_PrivateKeyOp(key, output, input, PR_FALSE); +} + +SECStatus +RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + return rsa_PrivateKeyOp(key, output, input, PR_TRUE); +} + +SECStatus +RSA_PrivateKeyCheck(const RSAPrivateKey *key) +{ + mp_int p, q, n, psub1, qsub1, e, d, d_p, d_q, qInv, res; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&n) = 0; + MP_DIGITS(&psub1) = 0; + MP_DIGITS(&qsub1) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&d) = 0; + MP_DIGITS(&d_p) = 0; + MP_DIGITS(&d_q) = 0; + MP_DIGITS(&qInv) = 0; + MP_DIGITS(&res) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&psub1)); + CHECK_MPI_OK(mp_init(&qsub1)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&d)); + CHECK_MPI_OK(mp_init(&d_p)); + CHECK_MPI_OK(mp_init(&d_q)); + CHECK_MPI_OK(mp_init(&qInv)); + CHECK_MPI_OK(mp_init(&res)); + + if (!key->modulus.data || !key->prime1.data || !key->prime2.data || + !key->publicExponent.data || !key->privateExponent.data || + !key->exponent1.data || !key->exponent2.data || + !key->coefficient.data) { + /* call RSA_PopulatePrivateKey first, if the application wishes to + * recover these parameters */ + err = MP_BADARG; + goto cleanup; + } + + SECITEM_TO_MPINT(key->modulus, &n); + SECITEM_TO_MPINT(key->prime1, &p); + SECITEM_TO_MPINT(key->prime2, &q); + SECITEM_TO_MPINT(key->publicExponent, &e); + SECITEM_TO_MPINT(key->privateExponent, &d); + SECITEM_TO_MPINT(key->exponent1, &d_p); + SECITEM_TO_MPINT(key->exponent2, &d_q); + SECITEM_TO_MPINT(key->coefficient, &qInv); + /* p and q must be distinct. */ + if (mp_cmp(&p, &q) == 0) { + rv = SECFailure; + goto cleanup; + } +#define VERIFY_MPI_EQUAL(m1, m2) \ + if (mp_cmp(m1, m2) != 0) { \ + rv = SECFailure; \ + goto cleanup; \ + } +#define VERIFY_MPI_EQUAL_1(m) \ + if (mp_cmp_d(m, 1) != 0) { \ + rv = SECFailure; \ + goto cleanup; \ + } + /* n == p * q */ + CHECK_MPI_OK(mp_mul(&p, &q, &res)); + VERIFY_MPI_EQUAL(&res, &n); + /* gcd(e, p-1) == 1 */ + CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1)); + CHECK_MPI_OK(mp_gcd(&e, &psub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* gcd(e, q-1) == 1 */ + CHECK_MPI_OK(mp_sub_d(&q, 1, &qsub1)); + CHECK_MPI_OK(mp_gcd(&e, &qsub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* d*e == 1 mod p-1 */ + CHECK_MPI_OK(mp_mulmod(&d, &e, &psub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* d*e == 1 mod q-1 */ + CHECK_MPI_OK(mp_mulmod(&d, &e, &qsub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* d_p == d mod p-1 */ + CHECK_MPI_OK(mp_mod(&d, &psub1, &res)); + VERIFY_MPI_EQUAL(&res, &d_p); + /* d_q == d mod q-1 */ + CHECK_MPI_OK(mp_mod(&d, &qsub1, &res)); + VERIFY_MPI_EQUAL(&res, &d_q); + /* q * q**-1 == 1 mod p */ + CHECK_MPI_OK(mp_mulmod(&q, &qInv, &p, &res)); + VERIFY_MPI_EQUAL_1(&res); + +cleanup: + mp_clear(&n); + mp_clear(&p); + mp_clear(&q); + mp_clear(&psub1); + mp_clear(&qsub1); + mp_clear(&e); + mp_clear(&d); + mp_clear(&d_p); + mp_clear(&d_q); + mp_clear(&qInv); + mp_clear(&res); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +SECStatus +RSA_Init(void) +{ + if (PR_CallOnce(&coBPInit, init_blinding_params_list) != PR_SUCCESS) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + return SECSuccess; +} + +/* cleanup at shutdown */ +void +RSA_Cleanup(void) +{ + blindingParams *bp = NULL; + if (!coBPInit.initialized) + return; + + while (!PR_CLIST_IS_EMPTY(&blindingParamsList.head)) { + RSABlindingParams *rsabp = + (RSABlindingParams *)PR_LIST_HEAD(&blindingParamsList.head); + PR_REMOVE_LINK(&rsabp->link); + /* clear parameters cache */ + while (rsabp->bp != NULL) { + bp = rsabp->bp; + rsabp->bp = rsabp->bp->next; + mp_clear(&bp->f); + mp_clear(&bp->g); + } + SECITEM_ZfreeItem(&rsabp->modulus, PR_FALSE); + PORT_Free(rsabp); + } + + if (blindingParamsList.cVar) { + PR_DestroyCondVar(blindingParamsList.cVar); + blindingParamsList.cVar = NULL; + } + + if (blindingParamsList.lock) { + SKIP_AFTER_FORK(PZ_DestroyLock(blindingParamsList.lock)); + blindingParamsList.lock = NULL; + } + + coBPInit.initialized = 0; + coBPInit.inProgress = 0; + coBPInit.status = 0; +} + +/* + * need a central place for this function to free up all the memory that + * free_bl may have allocated along the way. Currently only RSA does this, + * so I've put it here for now. + */ +void +BL_Cleanup(void) +{ + RSA_Cleanup(); +} + +PRBool bl_parentForkedAfterC_Initialize; + +/* + * Set fork flag so it can be tested in SKIP_AFTER_FORK on relevant platforms. + */ +void +BL_SetForkState(PRBool forked) +{ + bl_parentForkedAfterC_Initialize = forked; +} diff --git a/security/nss/lib/freebl/rsapkcs.c b/security/nss/lib/freebl/rsapkcs.c new file mode 100644 index 0000000000..91b4c7c5fc --- /dev/null +++ b/security/nss/lib/freebl/rsapkcs.c @@ -0,0 +1,1705 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * RSA PKCS#1 v2.1 (RFC 3447) operations + */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secerr.h" + +#include "blapi.h" +#include "secitem.h" +#include "blapii.h" + +#define RSA_BLOCK_MIN_PAD_LEN 8 +#define RSA_BLOCK_FIRST_OCTET 0x00 +#define RSA_BLOCK_PRIVATE_PAD_OCTET 0xff +#define RSA_BLOCK_AFTER_PAD_OCTET 0x00 + +/* + * RSA block types + * + * The values of RSA_BlockPrivate and RSA_BlockPublic are fixed. + * The value of RSA_BlockRaw isn't fixed by definition, but we are keeping + * the value that NSS has been using in the past. + */ +typedef enum { + RSA_BlockPrivate = 1, /* pad for a private-key operation */ + RSA_BlockPublic = 2, /* pad for a public-key operation */ + RSA_BlockRaw = 4 /* simply justify the block appropriately */ +} RSA_BlockType; + +/* Needed for RSA-PSS functions */ +static const unsigned char eightZeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +/* Constant time comparison of a single byte. + * Returns 1 iff a == b, otherwise returns 0. + * Note: For ranges of bytes, use constantTimeCompare. + */ +static unsigned char +constantTimeEQ8(unsigned char a, unsigned char b) +{ + unsigned char c = ~((a - b) | (b - a)); + c >>= 7; + return c; +} + +/* Constant time comparison of a range of bytes. + * Returns 1 iff len bytes of a are identical to len bytes of b, otherwise + * returns 0. + */ +static unsigned char +constantTimeCompare(const unsigned char *a, + const unsigned char *b, + unsigned int len) +{ + unsigned char tmp = 0; + unsigned int i; + for (i = 0; i < len; ++i, ++a, ++b) + tmp |= *a ^ *b; + return constantTimeEQ8(0x00, tmp); +} + +/* Constant time conditional. + * Returns a if c is 1, or b if c is 0. The result is undefined if c is + * not 0 or 1. + */ +static unsigned int +constantTimeCondition(unsigned int c, + unsigned int a, + unsigned int b) +{ + return (~(c - 1) & a) | ((c - 1) & b); +} + +static unsigned int +rsa_modulusLen(SECItem *modulus) +{ + if (modulus->len == 0) { + return 0; + } + + unsigned char byteZero = modulus->data[0]; + unsigned int modLen = modulus->len - !byteZero; + return modLen; +} + +static unsigned int +rsa_modulusBits(SECItem *modulus) +{ + if (modulus->len == 0) { + return 0; + } + + unsigned char byteZero = modulus->data[0]; + unsigned int numBits = (modulus->len - 1) * 8; + + if (byteZero == 0 && modulus->len == 1) { + return 0; + } + + if (byteZero == 0) { + numBits -= 8; + byteZero = modulus->data[1]; + } + + while (byteZero > 0) { + numBits++; + byteZero >>= 1; + } + + return numBits; +} + +/* + * Format one block of data for public/private key encryption using + * the rules defined in PKCS #1. + */ +static unsigned char * +rsa_FormatOneBlock(unsigned modulusLen, + RSA_BlockType blockType, + SECItem *data) +{ + unsigned char *block; + unsigned char *bp; + unsigned int padLen; + unsigned int i, j; + SECStatus rv; + + block = (unsigned char *)PORT_Alloc(modulusLen); + if (block == NULL) + return NULL; + + bp = block; + + /* + * All RSA blocks start with two octets: + * 0x00 || BlockType + */ + *bp++ = RSA_BLOCK_FIRST_OCTET; + *bp++ = (unsigned char)blockType; + + switch (blockType) { + + /* + * Blocks intended for private-key operation. + */ + case RSA_BlockPrivate: /* preferred method */ + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * 1 1 padLen 1 data->len + * padLen must be at least RSA_BLOCK_MIN_PAD_LEN (8) bytes. + * Pad is either all 0x00 or all 0xff bytes, depending on blockType. + */ + padLen = modulusLen - data->len - 3; + PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN); + if (padLen < RSA_BLOCK_MIN_PAD_LEN) { + PORT_ZFree(block, modulusLen); + return NULL; + } + PORT_Memset(bp, RSA_BLOCK_PRIVATE_PAD_OCTET, padLen); + bp += padLen; + *bp++ = RSA_BLOCK_AFTER_PAD_OCTET; + PORT_Memcpy(bp, data->data, data->len); + break; + + /* + * Blocks intended for public-key operation. + */ + case RSA_BlockPublic: + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * 1 1 padLen 1 data->len + * Pad is 8 or more non-zero random bytes. + * + * Build the block left to right. + * Fill the entire block from Pad to the end with random bytes. + * Use the bytes after Pad as a supply of extra random bytes from + * which to find replacements for the zero bytes in Pad. + * If we need more than that, refill the bytes after Pad with + * new random bytes as necessary. + */ + + padLen = modulusLen - (data->len + 3); + PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN); + if (padLen < RSA_BLOCK_MIN_PAD_LEN) { + PORT_ZFree(block, modulusLen); + return NULL; + } + j = modulusLen - 2; + rv = RNG_GenerateGlobalRandomBytes(bp, j); + if (rv == SECSuccess) { + for (i = 0; i < padLen;) { + unsigned char repl; + /* Pad with non-zero random data. */ + if (bp[i] != RSA_BLOCK_AFTER_PAD_OCTET) { + ++i; + continue; + } + if (j <= padLen) { + rv = RNG_GenerateGlobalRandomBytes(bp + padLen, + modulusLen - (2 + padLen)); + if (rv != SECSuccess) + break; + j = modulusLen - 2; + } + do { + repl = bp[--j]; + } while (repl == RSA_BLOCK_AFTER_PAD_OCTET && j > padLen); + if (repl != RSA_BLOCK_AFTER_PAD_OCTET) { + bp[i++] = repl; + } + } + } + if (rv != SECSuccess) { + PORT_ZFree(block, modulusLen); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return NULL; + } + bp += padLen; + *bp++ = RSA_BLOCK_AFTER_PAD_OCTET; + PORT_Memcpy(bp, data->data, data->len); + break; + + default: + PORT_Assert(0); + PORT_ZFree(block, modulusLen); + return NULL; + } + + return block; +} + +static SECStatus +rsa_FormatBlock(SECItem *result, + unsigned modulusLen, + RSA_BlockType blockType, + SECItem *data) +{ + switch (blockType) { + case RSA_BlockPrivate: + case RSA_BlockPublic: + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * + * The "3" below is the first octet + the second octet + the 0x00 + * octet that always comes just before the ActualData. + */ + if (data->len > (modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN))) { + return SECFailure; + } + result->data = rsa_FormatOneBlock(modulusLen, blockType, data); + if (result->data == NULL) { + result->len = 0; + return SECFailure; + } + result->len = modulusLen; + + break; + + case RSA_BlockRaw: + /* + * Pad || ActualData + * Pad is zeros. The application is responsible for recovering + * the actual data. + */ + if (data->len > modulusLen) { + return SECFailure; + } + result->data = (unsigned char *)PORT_ZAlloc(modulusLen); + result->len = modulusLen; + PORT_Memcpy(result->data + (modulusLen - data->len), + data->data, data->len); + break; + + default: + PORT_Assert(0); + result->data = NULL; + result->len = 0; + return SECFailure; + } + + return SECSuccess; +} + +/* + * Mask generation function MGF1 as defined in PKCS #1 v2.1 / RFC 3447. + */ +static SECStatus +MGF1(HASH_HashType hashAlg, + unsigned char *mask, + unsigned int maskLen, + const unsigned char *mgfSeed, + unsigned int mgfSeedLen) +{ + unsigned int digestLen; + PRUint32 counter; + PRUint32 rounds; + unsigned char *tempHash; + unsigned char *temp; + const SECHashObject *hash; + void *hashContext; + unsigned char C[4]; + SECStatus rv = SECSuccess; + + hash = HASH_GetRawHashObject(hashAlg); + if (hash == NULL) { + return SECFailure; + } + + hashContext = (*hash->create)(); + rounds = (maskLen + hash->length - 1) / hash->length; + for (counter = 0; counter < rounds; counter++) { + C[0] = (unsigned char)((counter >> 24) & 0xff); + C[1] = (unsigned char)((counter >> 16) & 0xff); + C[2] = (unsigned char)((counter >> 8) & 0xff); + C[3] = (unsigned char)(counter & 0xff); + + /* This could be optimized when the clone functions in + * rawhash.c are implemented. */ + (*hash->begin)(hashContext); + (*hash->update)(hashContext, mgfSeed, mgfSeedLen); + (*hash->update)(hashContext, C, sizeof C); + + tempHash = mask + counter * hash->length; + if (counter != (rounds - 1)) { + (*hash->end)(hashContext, tempHash, &digestLen, hash->length); + } else { /* we're in the last round and need to cut the hash */ + temp = (unsigned char *)PORT_Alloc(hash->length); + if (!temp) { + rv = SECFailure; + goto done; + } + (*hash->end)(hashContext, temp, &digestLen, hash->length); + PORT_Memcpy(tempHash, temp, maskLen - counter * hash->length); + PORT_Free(temp); + } + } + +done: + (*hash->destroy)(hashContext, PR_TRUE); + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_SignRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *data, + unsigned int dataLen) +{ + SECStatus rv = SECSuccess; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + if (maxOutputLen < modulusLen) + return SECFailure; + + unformatted.len = dataLen; + unformatted.data = (unsigned char *)data; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted); + if (rv != SECSuccess) + goto done; + + rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data); + *outputLen = modulusLen; + +done: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_CheckSignRaw(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *buffer; + + if (sigLen != modulusLen) + goto failure; + if (hashLen > modulusLen) + goto failure; + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) + goto failure; + + rv = RSA_PublicKeyOp(key, buffer, sig); + if (rv != SECSuccess) + goto loser; + + /* + * make sure we get the same results + */ + /* XXX(rsleevi): Constant time */ + /* NOTE: should we verify the leading zeros? */ + if (PORT_Memcmp(buffer + (modulusLen - hashLen), hash, hashLen) != 0) + goto loser; + + PORT_Free(buffer); + return SECSuccess; + +loser: + PORT_Free(buffer); +failure: + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_CheckSignRecoverRaw(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + + if (sigLen != modulusLen) + goto failure; + if (maxDataLen < modulusLen) + goto failure; + + rv = RSA_PublicKeyOp(key, data, sig); + if (rv != SECSuccess) + goto failure; + + *dataLen = modulusLen; + return SECSuccess; + +failure: + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_EncryptRaw(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + formatted.data = NULL; + if (maxOutputLen < modulusLen) + goto failure; + + unformatted.len = inputLen; + unformatted.data = (unsigned char *)input; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted); + if (rv != SECSuccess) + goto failure; + + rv = RSA_PublicKeyOp(key, output, formatted.data); + if (rv != SECSuccess) + goto failure; + + PORT_ZFree(formatted.data, modulusLen); + *outputLen = modulusLen; + return SECSuccess; + +failure: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_DecryptRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + + if (modulusLen > maxOutputLen) + goto failure; + if (inputLen != modulusLen) + goto failure; + + rv = RSA_PrivateKeyOp(key, output, input); + if (rv != SECSuccess) + goto failure; + + *outputLen = modulusLen; + return SECSuccess; + +failure: + return SECFailure; +} + +/* + * Decodes an EME-OAEP encoded block, validating the encoding in constant + * time. + * Described in RFC 3447, section 7.1.2. + * input contains the encoded block, after decryption. + * label is the optional value L that was associated with the message. + * On success, the original message and message length will be stored in + * output and outputLen. + */ +static SECStatus +eme_oaep_decode(unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen) +{ + const SECHashObject *hash; + void *hashContext; + SECStatus rv = SECFailure; + unsigned char labelHash[HASH_LENGTH_MAX]; + unsigned int i; + unsigned int maskLen; + unsigned int paddingOffset; + unsigned char *mask = NULL; + unsigned char *tmpOutput = NULL; + unsigned char isGood; + unsigned char foundPaddingEnd; + + hash = HASH_GetRawHashObject(hashAlg); + + /* 1.c */ + if (inputLen < (hash->length * 2) + 2) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + /* Step 3.a - Generate lHash */ + hashContext = (*hash->create)(); + if (hashContext == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hashContext); + if (labelLen > 0) + (*hash->update)(hashContext, label, labelLen); + (*hash->end)(hashContext, labelHash, &i, sizeof(labelHash)); + (*hash->destroy)(hashContext, PR_TRUE); + + tmpOutput = (unsigned char *)PORT_Alloc(inputLen); + if (tmpOutput == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto done; + } + + maskLen = inputLen - hash->length - 1; + mask = (unsigned char *)PORT_Alloc(maskLen); + if (mask == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto done; + } + + PORT_Memcpy(tmpOutput, input, inputLen); + + /* 3.c - Generate seedMask */ + MGF1(maskHashAlg, mask, hash->length, &tmpOutput[1 + hash->length], + inputLen - hash->length - 1); + /* 3.d - Unmask seed */ + for (i = 0; i < hash->length; ++i) + tmpOutput[1 + i] ^= mask[i]; + + /* 3.e - Generate dbMask */ + MGF1(maskHashAlg, mask, maskLen, &tmpOutput[1], hash->length); + /* 3.f - Unmask DB */ + for (i = 0; i < maskLen; ++i) + tmpOutput[1 + hash->length + i] ^= mask[i]; + + /* 3.g - Compare Y, lHash, and PS in constant time + * Warning: This code is timing dependent and must not disclose which of + * these were invalid. + */ + paddingOffset = 0; + isGood = 1; + foundPaddingEnd = 0; + + /* Compare Y */ + isGood &= constantTimeEQ8(0x00, tmpOutput[0]); + + /* Compare lHash and lHash' */ + isGood &= constantTimeCompare(&labelHash[0], + &tmpOutput[1 + hash->length], + hash->length); + + /* Compare that the padding is zero or more zero octets, followed by a + * 0x01 octet */ + for (i = 1 + (hash->length * 2); i < inputLen; ++i) { + unsigned char isZero = constantTimeEQ8(0x00, tmpOutput[i]); + unsigned char isOne = constantTimeEQ8(0x01, tmpOutput[i]); + /* non-constant time equivalent: + * if (tmpOutput[i] == 0x01 && !foundPaddingEnd) + * paddingOffset = i; + */ + paddingOffset = constantTimeCondition(isOne & ~foundPaddingEnd, i, + paddingOffset); + /* non-constant time equivalent: + * if (tmpOutput[i] == 0x01) + * foundPaddingEnd = true; + * + * Note: This may yield false positives, as it will be set whenever + * a 0x01 byte is encountered. If there was bad padding (eg: + * 0x03 0x02 0x01), foundPaddingEnd will still be set to true, and + * paddingOffset will still be set to 2. + */ + foundPaddingEnd = constantTimeCondition(isOne, 1, foundPaddingEnd); + /* non-constant time equivalent: + * if (tmpOutput[i] != 0x00 && tmpOutput[i] != 0x01 && + * !foundPaddingEnd) { + * isGood = false; + * } + * + * Note: This may yield false positives, as a message (and padding) + * that is entirely zeros will result in isGood still being true. Thus + * it's necessary to check foundPaddingEnd is positive below. + */ + isGood = constantTimeCondition(~foundPaddingEnd & ~isZero, 0, isGood); + } + + /* While both isGood and foundPaddingEnd may have false positives, they + * cannot BOTH have false positives. If both are not true, then an invalid + * message was received. Note, this comparison must still be done in constant + * time so as not to leak either condition. + */ + if (!(isGood & foundPaddingEnd)) { + PORT_SetError(SEC_ERROR_BAD_DATA); + goto done; + } + + /* End timing dependent code */ + + ++paddingOffset; /* Skip the 0x01 following the end of PS */ + + *outputLen = inputLen - paddingOffset; + if (*outputLen > maxOutputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto done; + } + + if (*outputLen) + PORT_Memcpy(output, &tmpOutput[paddingOffset], *outputLen); + rv = SECSuccess; + +done: + if (mask) + PORT_ZFree(mask, maskLen); + if (tmpOutput) + PORT_ZFree(tmpOutput, inputLen); + return rv; +} + +/* + * Generate an EME-OAEP encoded block for encryption + * Described in RFC 3447, section 7.1.1 + * We use input instead of M for the message to be encrypted + * label is the optional value L to be associated with the message. + */ +static SECStatus +eme_oaep_encode(unsigned char *em, + unsigned int emLen, + const unsigned char *input, + unsigned int inputLen, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen) +{ + const SECHashObject *hash; + void *hashContext; + SECStatus rv; + unsigned char *mask; + unsigned int reservedLen; + unsigned int dbMaskLen; + unsigned int i; + + hash = HASH_GetRawHashObject(hashAlg); + PORT_Assert(seed == NULL || seedLen == hash->length); + + /* Step 1.b */ + reservedLen = (2 * hash->length) + 2; + if (emLen < reservedLen || inputLen > (emLen - reservedLen)) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + /* + * From RFC 3447, Section 7.1 + * +----------+---------+-------+ + * DB = | lHash | PS | M | + * +----------+---------+-------+ + * | + * +----------+ V + * | seed |--> MGF ---> xor + * +----------+ | + * | | + * +--+ V | + * |00| xor <----- MGF <-----| + * +--+ | | + * | | | + * V V V + * +--+----------+----------------------------+ + * EM = |00|maskedSeed| maskedDB | + * +--+----------+----------------------------+ + * + * We use mask to hold the result of the MGF functions, and all other + * values are generated in their final resting place. + */ + *em = 0x00; + + /* Step 2.a - Generate lHash */ + hashContext = (*hash->create)(); + if (hashContext == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hashContext); + if (labelLen > 0) + (*hash->update)(hashContext, label, labelLen); + (*hash->end)(hashContext, &em[1 + hash->length], &i, hash->length); + (*hash->destroy)(hashContext, PR_TRUE); + + /* Step 2.b - Generate PS */ + if (emLen - reservedLen - inputLen > 0) { + PORT_Memset(em + 1 + (hash->length * 2), 0x00, + emLen - reservedLen - inputLen); + } + + /* Step 2.c. - Generate DB + * DB = lHash || PS || 0x01 || M + * Note that PS and lHash have already been placed into em at their + * appropriate offsets. This just copies M into place + */ + em[emLen - inputLen - 1] = 0x01; + if (inputLen) + PORT_Memcpy(em + emLen - inputLen, input, inputLen); + + if (seed == NULL) { + /* Step 2.d - Generate seed */ + rv = RNG_GenerateGlobalRandomBytes(em + 1, hash->length); + if (rv != SECSuccess) { + return rv; + } + } else { + /* For Known Answer Tests, copy the supplied seed. */ + PORT_Memcpy(em + 1, seed, seedLen); + } + + /* Step 2.e - Generate dbMask*/ + dbMaskLen = emLen - hash->length - 1; + mask = (unsigned char *)PORT_Alloc(dbMaskLen); + if (mask == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + MGF1(maskHashAlg, mask, dbMaskLen, em + 1, hash->length); + /* Step 2.f - Compute maskedDB*/ + for (i = 0; i < dbMaskLen; ++i) + em[1 + hash->length + i] ^= mask[i]; + + /* Step 2.g - Generate seedMask */ + MGF1(maskHashAlg, mask, hash->length, &em[1 + hash->length], dbMaskLen); + /* Step 2.h - Compute maskedSeed */ + for (i = 0; i < hash->length; ++i) + em[1 + i] ^= mask[i]; + + PORT_ZFree(mask, dbMaskLen); + return SECSuccess; +} + +SECStatus +RSA_EncryptOAEP(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *oaepEncoded = NULL; + + if (maxOutputLen < modulusLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + if ((labelLen == 0 && label != NULL) || + (labelLen > 0 && label == NULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen); + if (oaepEncoded == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + rv = eme_oaep_encode(oaepEncoded, modulusLen, input, inputLen, + hashAlg, maskHashAlg, label, labelLen, seed, seedLen); + if (rv != SECSuccess) + goto done; + + rv = RSA_PublicKeyOp(key, output, oaepEncoded); + if (rv != SECSuccess) + goto done; + *outputLen = modulusLen; + +done: + PORT_Free(oaepEncoded); + return rv; +} + +SECStatus +RSA_DecryptOAEP(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *oaepEncoded = NULL; + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + if (inputLen != modulusLen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if ((labelLen == 0 && label != NULL) || + (labelLen > 0 && label == NULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen); + if (oaepEncoded == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + + rv = RSA_PrivateKeyOpDoubleChecked(key, oaepEncoded, input); + if (rv != SECSuccess) { + goto done; + } + rv = eme_oaep_decode(output, outputLen, maxOutputLen, oaepEncoded, + modulusLen, hashAlg, maskHashAlg, label, + labelLen); + +done: + if (oaepEncoded) + PORT_ZFree(oaepEncoded, modulusLen); + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_EncryptBlock(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + formatted.data = NULL; + if (maxOutputLen < modulusLen) + goto failure; + + unformatted.len = inputLen; + unformatted.data = (unsigned char *)input; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPublic, + &unformatted); + if (rv != SECSuccess) + goto failure; + + rv = RSA_PublicKeyOp(key, output, formatted.data); + if (rv != SECSuccess) + goto failure; + + PORT_ZFree(formatted.data, modulusLen); + *outputLen = modulusLen; + return SECSuccess; + +failure: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return SECFailure; +} + +static HMACContext * +rsa_GetHMACContext(const SECHashObject *hash, RSAPrivateKey *key, + const unsigned char *input, unsigned int inputLen) +{ + unsigned char keyHash[HASH_LENGTH_MAX]; + void *hashContext; + HMACContext *hmac = NULL; + unsigned int privKeyLen = key->privateExponent.len; + unsigned int keyLen; + SECStatus rv; + + /* first get the key hash (should store in the key structure) */ + PORT_Memset(keyHash, 0, sizeof(keyHash)); + hashContext = (*hash->create)(); + if (hashContext == NULL) { + return NULL; + } + (*hash->begin)(hashContext); + if (privKeyLen < inputLen) { + int padLen = inputLen - privKeyLen; + while (padLen > sizeof(keyHash)) { + (*hash->update)(hashContext, keyHash, sizeof(keyHash)); + padLen -= sizeof(keyHash); + } + (*hash->update)(hashContext, keyHash, padLen); + } + (*hash->update)(hashContext, key->privateExponent.data, privKeyLen); + (*hash->end)(hashContext, keyHash, &keyLen, sizeof(keyHash)); + (*hash->destroy)(hashContext, PR_TRUE); + + /* now create the hmac key */ + hmac = HMAC_Create(hash, keyHash, keyLen, PR_TRUE); + if (hmac == NULL) { + PORT_Memset(keyHash, 0, sizeof(keyHash)); + return NULL; + } + HMAC_Begin(hmac); + HMAC_Update(hmac, input, inputLen); + rv = HMAC_Finish(hmac, keyHash, &keyLen, sizeof(keyHash)); + if (rv != SECSuccess) { + PORT_Memset(keyHash, 0, sizeof(keyHash)); + HMAC_Destroy(hmac, PR_TRUE); + return NULL; + } + /* Finally set the new key into the hash context. We + * reuse the original context allocated above so we don't + * need to allocate and free another one */ + rv = HMAC_ReInit(hmac, hash, keyHash, keyLen, PR_TRUE); + PORT_Memset(keyHash, 0, sizeof(keyHash)); + if (rv != SECSuccess) { + HMAC_Destroy(hmac, PR_TRUE); + return NULL; + } + + return hmac; +} + +static SECStatus +rsa_HMACPrf(HMACContext *hmac, const char *label, int labelLen, + int hashLength, unsigned char *output, int length) +{ + unsigned char iterator[2] = { 0, 0 }; + unsigned char encodedLen[2] = { 0, 0 }; + unsigned char hmacLast[HASH_LENGTH_MAX]; + unsigned int left = length; + unsigned int hashReturn; + SECStatus rv = SECSuccess; + + /* encodedLen is in bits, length is in bytes, thus the shifts + * do an implied multiply by 8 */ + encodedLen[0] = (length >> 5) & 0xff; + encodedLen[1] = (length << 3) & 0xff; + + while (left > hashLength) { + HMAC_Begin(hmac); + HMAC_Update(hmac, iterator, 2); + HMAC_Update(hmac, (const unsigned char *)label, labelLen); + HMAC_Update(hmac, encodedLen, 2); + rv = HMAC_Finish(hmac, output, &hashReturn, hashLength); + if (rv != SECSuccess) { + return rv; + } + iterator[1]++; + if (iterator[1] == 0) + iterator[0]++; + left -= hashLength; + output += hashLength; + } + if (left) { + HMAC_Begin(hmac); + HMAC_Update(hmac, iterator, 2); + HMAC_Update(hmac, (const unsigned char *)label, labelLen); + HMAC_Update(hmac, encodedLen, 2); + rv = HMAC_Finish(hmac, hmacLast, &hashReturn, sizeof(hmacLast)); + if (rv != SECSuccess) { + return rv; + } + PORT_Memcpy(output, hmacLast, left); + PORT_Memset(hmacLast, 0, sizeof(hmacLast)); + } + return rv; +} + +/* This function takes a 16-bit input number and + * creates the smallest mask which covers + * the whole number. Examples: + * 0x81 -> 0xff + * 0x1af -> 0x1ff + * 0x4d1 -> 0x7ff + */ +static int +makeMask16(int len) +{ + // or the high bit in each bit location + len |= (len >> 1); + len |= (len >> 2); + len |= (len >> 4); + len |= (len >> 8); + return len; +} + +#define STRING_AND_LENGTH(s) s, sizeof(s) - 1 +static int +rsa_GetErrorLength(HMACContext *hmac, int hashLen, int maxLegalLen) +{ + unsigned char out[128 * 2]; + unsigned char *outp; + int outLength = 0; + int lengthMask; + SECStatus rv; + + lengthMask = makeMask16(maxLegalLen); + rv = rsa_HMACPrf(hmac, STRING_AND_LENGTH("length"), hashLen, + out, sizeof(out)); + if (rv != SECSuccess) { + return -1; + } + for (outp = out; outp < out + sizeof(out); outp += 2) { + int candidate = outp[0] << 8 | outp[1]; + candidate = candidate & lengthMask; + outLength = PORT_CT_SEL(PORT_CT_LT(candidate, maxLegalLen), + candidate, outLength); + } + PORT_Memset(out, 0, sizeof(out)); + return outLength; +} + +/* + * This function can only fail in environmental cases: Programming errors + * and out of memory situations. It can't fail if the keys are valid and + * the inputs are the proper size. If the actual RSA decryption fails, a + * fake value and a fake length, both of which have already been generated + * based on the key and input, are returned. + * Applications are expected to detect decryption failures based on the fact + * that the decrypted value (usually a key) doesn't validate. The prevents + * Blecheinbaucher style attacks against the key. */ +SECStatus +RSA_DecryptBlock(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + PRUint32 fail; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int i; + unsigned char *buffer = NULL; + unsigned char *errorBuffer = NULL; + unsigned char *bp = NULL; + unsigned char *ep = NULL; + unsigned int outLen = modulusLen; + unsigned int maxLegalLen = modulusLen - 10; + unsigned int errorLength; + const SECHashObject *hashObj; + HMACContext *hmac = NULL; + + /* failures in the top section indicate failures in the environment + * (memory) or the library. OK to return errors in these cases because + * it doesn't provide any oracle information to attackers. */ + if (inputLen != modulusLen || modulusLen < 10) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Allocate enough space to decrypt */ + buffer = PORT_ZAlloc(modulusLen); + if (!buffer) { + goto loser; + } + errorBuffer = PORT_ZAlloc(modulusLen); + if (!errorBuffer) { + goto loser; + } + hashObj = HASH_GetRawHashObject(HASH_AlgSHA256); + if (hashObj == NULL) { + goto loser; + } + + /* calculate the values to return in the error case rather than + * the actual returned values. This data is the same for the + * same input and private key. */ + hmac = rsa_GetHMACContext(hashObj, key, input, inputLen); + if (hmac == NULL) { + goto loser; + } + errorLength = rsa_GetErrorLength(hmac, hashObj->length, maxLegalLen); + if (((int)errorLength) < 0) { + goto loser; + } + /* we always have to generate a full moduluslen error string. Otherwise + * we create a timing dependency on errorLength, which could be used to + * determine the difference between errorLength and outputLen and tell + * us that there was a pkcs1 decryption failure */ + rv = rsa_HMACPrf(hmac, STRING_AND_LENGTH("message"), + hashObj->length, errorBuffer, modulusLen); + if (rv != SECSuccess) { + goto loser; + } + + HMAC_Destroy(hmac, PR_TRUE); + hmac = NULL; + + /* From here on out, we will always return success. If there is + * an error, we will return deterministic output based on the key + * and the input data. */ + rv = RSA_PrivateKeyOp(key, buffer, input); + + fail = PORT_CT_NE(rv, SECSuccess); + fail |= PORT_CT_NE(buffer[0], RSA_BLOCK_FIRST_OCTET) | PORT_CT_NE(buffer[1], RSA_BlockPublic); + + /* There have to be at least 8 bytes of padding. */ + for (i = 2; i < 10; i++) { + fail |= PORT_CT_EQ(buffer[i], RSA_BLOCK_AFTER_PAD_OCTET); + } + + for (i = 10; i < modulusLen; i++) { + unsigned int newLen = modulusLen - i - 1; + PRUint32 condition = PORT_CT_EQ(buffer[i], RSA_BLOCK_AFTER_PAD_OCTET) & PORT_CT_EQ(outLen, modulusLen); + outLen = PORT_CT_SEL(condition, newLen, outLen); + } + // this can only happen if a zero wasn't found above + fail |= PORT_CT_GE(outLen, modulusLen); + + outLen = PORT_CT_SEL(fail, errorLength, outLen); + + /* index into the correct buffer. Do it before we truncate outLen if the + * application was asking for less data than we can return */ + bp = buffer + modulusLen - outLen; + ep = errorBuffer + modulusLen - outLen; + + /* at this point, outLen returns no information about decryption failures, + * no need to hide its value. maxOutputLen is how much data the + * application is expecting, which is also not sensitive. */ + if (outLen > maxOutputLen) { + outLen = maxOutputLen; + } + + /* we can't use PORT_Memcpy because caching could create a time dependency + * on the status of fail. */ + for (i = 0; i < outLen; i++) { + output[i] = PORT_CT_SEL(fail, ep[i], bp[i]); + } + + *outputLen = outLen; + + PORT_Free(buffer); + PORT_Free(errorBuffer); + + return SECSuccess; + +loser: + if (hmac) { + HMAC_Destroy(hmac, PR_TRUE); + } + PORT_Free(buffer); + PORT_Free(errorBuffer); + + return SECFailure; +} + +/* + * Encode a RSA-PSS signature. + * Described in RFC 3447, section 9.1.1. + * We use mHash instead of M as input. + * emBits from the RFC is just modBits - 1, see section 8.1.1. + * We only support MGF1 as the MGF. + */ +static SECStatus +emsa_pss_encode(unsigned char *em, + unsigned int emLen, + unsigned int emBits, + const unsigned char *mHash, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen) +{ + const SECHashObject *hash; + void *hash_context; + unsigned char *dbMask; + unsigned int dbMaskLen; + unsigned int i; + SECStatus rv; + + hash = HASH_GetRawHashObject(hashAlg); + dbMaskLen = emLen - hash->length - 1; + + /* Step 3 */ + if (emLen < hash->length + saltLen + 2) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + /* Step 4 */ + if (salt == NULL) { + rv = RNG_GenerateGlobalRandomBytes(&em[dbMaskLen - saltLen], saltLen); + if (rv != SECSuccess) { + return rv; + } + } else { + PORT_Memcpy(&em[dbMaskLen - saltLen], salt, saltLen); + } + + /* Step 5 + 6 */ + /* Compute H and store it at its final location &em[dbMaskLen]. */ + hash_context = (*hash->create)(); + if (hash_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hash_context); + (*hash->update)(hash_context, eightZeros, 8); + (*hash->update)(hash_context, mHash, hash->length); + (*hash->update)(hash_context, &em[dbMaskLen - saltLen], saltLen); + (*hash->end)(hash_context, &em[dbMaskLen], &i, hash->length); + (*hash->destroy)(hash_context, PR_TRUE); + + /* Step 7 + 8 */ + PORT_Memset(em, 0, dbMaskLen - saltLen - 1); + em[dbMaskLen - saltLen - 1] = 0x01; + + /* Step 9 */ + dbMask = (unsigned char *)PORT_Alloc(dbMaskLen); + if (dbMask == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + MGF1(maskHashAlg, dbMask, dbMaskLen, &em[dbMaskLen], hash->length); + + /* Step 10 */ + for (i = 0; i < dbMaskLen; i++) + em[i] ^= dbMask[i]; + PORT_Free(dbMask); + + /* Step 11 */ + em[0] &= 0xff >> (8 * emLen - emBits); + + /* Step 12 */ + em[emLen - 1] = 0xbc; + + return SECSuccess; +} + +/* + * Verify a RSA-PSS signature. + * Described in RFC 3447, section 9.1.2. + * We use mHash instead of M as input. + * emBits from the RFC is just modBits - 1, see section 8.1.2. + * We only support MGF1 as the MGF. + */ +static SECStatus +emsa_pss_verify(const unsigned char *mHash, + const unsigned char *em, + unsigned int emLen, + unsigned int emBits, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen) +{ + const SECHashObject *hash; + void *hash_context; + unsigned char *db; + unsigned char *H_; /* H' from the RFC */ + unsigned int i; + unsigned int dbMaskLen; + unsigned int zeroBits; + SECStatus rv; + + hash = HASH_GetRawHashObject(hashAlg); + dbMaskLen = emLen - hash->length - 1; + + /* Step 3 + 4 */ + if ((emLen < (hash->length + saltLen + 2)) || + (em[emLen - 1] != 0xbc)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + /* Step 6 */ + zeroBits = 8 * emLen - emBits; + if (em[0] >> (8 - zeroBits)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + /* Step 7 */ + db = (unsigned char *)PORT_Alloc(dbMaskLen); + if (db == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + /* &em[dbMaskLen] points to H, used as mgfSeed */ + MGF1(maskHashAlg, db, dbMaskLen, &em[dbMaskLen], hash->length); + + /* Step 8 */ + for (i = 0; i < dbMaskLen; i++) { + db[i] ^= em[i]; + } + + /* Step 9 */ + db[0] &= 0xff >> zeroBits; + + /* Step 10 */ + for (i = 0; i < (dbMaskLen - saltLen - 1); i++) { + if (db[i] != 0) { + PORT_Free(db); + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + } + if (db[dbMaskLen - saltLen - 1] != 0x01) { + PORT_Free(db); + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + /* Step 12 + 13 */ + H_ = (unsigned char *)PORT_Alloc(hash->length); + if (H_ == NULL) { + PORT_Free(db); + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + hash_context = (*hash->create)(); + if (hash_context == NULL) { + PORT_Free(db); + PORT_Free(H_); + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hash_context); + (*hash->update)(hash_context, eightZeros, 8); + (*hash->update)(hash_context, mHash, hash->length); + (*hash->update)(hash_context, &db[dbMaskLen - saltLen], saltLen); + (*hash->end)(hash_context, H_, &i, hash->length); + (*hash->destroy)(hash_context, PR_TRUE); + + PORT_Free(db); + + /* Step 14 */ + if (PORT_Memcmp(H_, &em[dbMaskLen], hash->length) != 0) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; + } else { + rv = SECSuccess; + } + + PORT_Free(H_); + return rv; +} + +SECStatus +RSA_SignPSS(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLength, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int modulusBits = rsa_modulusBits(&key->modulus); + unsigned int emLen = modulusLen; + unsigned char *pssEncoded, *em; + + if (maxOutputLen < modulusLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + pssEncoded = em = (unsigned char *)PORT_Alloc(modulusLen); + if (pssEncoded == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + + /* len(em) == ceil((modulusBits - 1) / 8). */ + if (modulusBits % 8 == 1) { + em[0] = 0; + emLen--; + em++; + } + rv = emsa_pss_encode(em, emLen, modulusBits - 1, input, hashAlg, + maskHashAlg, salt, saltLength); + if (rv != SECSuccess) + goto done; + + // This sets error codes upon failure. + rv = RSA_PrivateKeyOpDoubleChecked(key, output, pssEncoded); + *outputLen = modulusLen; + +done: + PORT_Free(pssEncoded); + return rv; +} + +SECStatus +RSA_CheckSignPSS(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLength, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int modulusBits = rsa_modulusBits(&key->modulus); + unsigned int emLen = modulusLen; + unsigned char *buffer, *em; + + if (sigLen != modulusLen) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + buffer = em = (unsigned char *)PORT_Alloc(modulusLen); + if (!buffer) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + + rv = RSA_PublicKeyOp(key, buffer, sig); + if (rv != SECSuccess) { + PORT_Free(buffer); + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + /* len(em) == ceil((modulusBits - 1) / 8). */ + if (modulusBits % 8 == 1) { + emLen--; + em++; + } + rv = emsa_pss_verify(hash, em, emLen, modulusBits - 1, hashAlg, + maskHashAlg, saltLength); + + PORT_Free(buffer); + return rv; +} + +SECStatus +RSA_Sign(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted = { siBuffer, NULL, 0 }; + SECItem unformatted = { siBuffer, (unsigned char *)input, inputLen }; + + if (maxOutputLen < modulusLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto done; + } + + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPrivate, + &unformatted); + if (rv != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + goto done; + } + + // This sets error codes upon failure. + rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data); + *outputLen = modulusLen; + +done: + if (formatted.data != NULL) { + PORT_ZFree(formatted.data, modulusLen); + } + return rv; +} + +SECStatus +RSA_CheckSign(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int i; + unsigned char *buffer = NULL; + + if (sigLen != modulusLen) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * + * The "3" below is the first octet + the second octet + the 0x00 + * octet that always comes just before the ActualData. + */ + if (dataLen > modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN)) { + PORT_SetError(SEC_ERROR_BAD_DATA); + goto done; + } + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto done; + } + + if (RSA_PublicKeyOp(key, buffer, sig) != SECSuccess) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + + /* + * check the padding that was used + */ + if (buffer[0] != RSA_BLOCK_FIRST_OCTET || + buffer[1] != (unsigned char)RSA_BlockPrivate) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + for (i = 2; i < modulusLen - dataLen - 1; i++) { + if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + } + if (buffer[i] != RSA_BLOCK_AFTER_PAD_OCTET) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + + /* + * make sure we get the same results + */ + if (PORT_Memcmp(buffer + modulusLen - dataLen, data, dataLen) == 0) { + rv = SECSuccess; + } + +done: + if (buffer) { + PORT_Free(buffer); + } + return rv; +} + +SECStatus +RSA_CheckSignRecover(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int i; + unsigned char *buffer = NULL; + unsigned int padLen; + + if (sigLen != modulusLen) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto done; + } + + if (RSA_PublicKeyOp(key, buffer, sig) != SECSuccess) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + + *outputLen = 0; + + /* + * check the padding that was used + */ + if (buffer[0] != RSA_BLOCK_FIRST_OCTET || + buffer[1] != (unsigned char)RSA_BlockPrivate) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + for (i = 2; i < modulusLen; i++) { + if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) { + *outputLen = modulusLen - i - 1; + break; + } + if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + } + padLen = i - 2; + if (padLen < RSA_BLOCK_MIN_PAD_LEN) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + if (*outputLen == 0) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto done; + } + if (*outputLen > maxOutputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto done; + } + + PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen); + rv = SECSuccess; + +done: + if (buffer) { + PORT_Free(buffer); + } + return rv; +} diff --git a/security/nss/lib/freebl/scripts/LICENSE b/security/nss/lib/freebl/scripts/LICENSE new file mode 100644 index 0000000000..a9335c22f3 --- /dev/null +++ b/security/nss/lib/freebl/scripts/LICENSE @@ -0,0 +1,36 @@ +Copyright (c) 2006, CRYPTOGAMS by +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain copyright notices, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the CRYPTOGAMS nor the names of its + copyright holder and contributors may be used to endorse or + promote products derived from this software without specific + prior written permission. + +ALTERNATIVELY, provided that this notice is retained in full, this +product may be distributed under the terms of the GNU General Public +License (GPL), in which case the provisions of the GPL apply INSTEAD OF +those given above. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/security/nss/lib/freebl/scripts/gen.sh b/security/nss/lib/freebl/scripts/gen.sh new file mode 100755 index 0000000000..ea415cc949 --- /dev/null +++ b/security/nss/lib/freebl/scripts/gen.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Run the scripts in this folder, generating the assembly, +# + +perl sha512p8-ppc.pl linux64le sha512-p8.s + +# Add the license mention +cat > hdr << "EOF" +# Copyright (c) 2006, CRYPTOGAMS by +# All rights reserved. +# See the full LICENSE under scripts/. + +EOF + +cat hdr sha512-p8.s > ../sha512-p8.s + +# Cleanup +rm hdr sha512-p8.s diff --git a/security/nss/lib/freebl/scripts/ppc-xlate.pl b/security/nss/lib/freebl/scripts/ppc-xlate.pl new file mode 100644 index 0000000000..a0fcf6a31f --- /dev/null +++ b/security/nss/lib/freebl/scripts/ppc-xlate.pl @@ -0,0 +1,352 @@ +#!/usr/bin/env perl + +# PowerPC assembler distiller by \@dot-asm. + +################################################################ +# Recognized "flavour"-s are: +# +# linux{32|64}[le] GNU assembler and ELF symbol decorations, +# with little-endian option +# linux64v2 GNU asssembler and big-endian instantiation +# of latest ELF specification +# aix{32|64} AIX assembler and symbol decorations +# osx{32|64} Mac OS X assembler and symbol decoratons + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +my %GLOBALS; +my %TYPES; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $type = sub { + my ($dir,$name,$type) = @_; + + $TYPES{$name} = $type; + if ($flavour =~ /linux/) { + $name =~ s|^\.||; + ".type $name,$type"; + } else { + ""; + } +}; +my $globl = sub { + my $junk = shift; + my $name = shift; + my $global = \$GLOBALS{$name}; + my $type = \$TYPES{$name}; + my $ret; + + $name =~ s|^\.||; + + SWITCH: for ($flavour) { + /aix/ && do { if (!$$type) { + $$type = "\@function"; + } + if ($$type =~ /function/) { + $name = ".$name"; + } + last; + }; + /osx/ && do { $name = "_$name"; + last; + }; + /linux.*(32|64(le|v2))/ + && do { $ret .= ".globl $name"; + if (!$$type) { + $ret .= "\n.type $name,\@function"; + $$type = "\@function"; + } + last; + }; + /linux.*64/ && do { $ret .= ".globl $name"; + if (!$$type) { + $ret .= "\n.type $name,\@function"; + $$type = "\@function"; + } + if ($$type =~ /function/) { + $ret .= "\n.section \".opd\",\"aw\""; + $ret .= "\n.align 3"; + $ret .= "\n$name:"; + $ret .= "\n.quad .$name,.TOC.\@tocbase,0"; + $ret .= "\n.previous"; + $name = ".$name"; + } + last; + }; + } + + $ret = ".globl $name" if (!$ret); + $$global = $name; + $ret; +}; +my $text = sub { + my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; + $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64(le|v2)/); + $ret; +}; +my $machine = sub { + my $junk = shift; + my $arch = shift; + if ($flavour =~ /osx/) + { $arch =~ s/\"//g; + $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); + } + ".machine $arch"; +}; +my $size = sub { + if ($flavour =~ /linux/) + { shift; + my $name = shift; + my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name; + my $ret = ".size $$real,.-$$real"; + $name =~ s|^\.||; + if ($$real ne $name) { + $ret .= "\n.size $name,.-$$real"; + } + $ret; + } + else + { ""; } +}; +my $asciz = sub { + shift; + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; +my $quad = sub { + shift; + my @ret; + my ($hi,$lo); + for (@_) { + if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) + { $hi=$1?"0x$1":"0"; $lo="0x$2"; } + elsif (/^([0-9]+)$/o) + { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl + else + { $hi=undef; $lo=$_; } + + if (defined($hi)) + { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } + else + { push(@ret,".quad $lo"); } + } + join("\n",@ret); +}; + +################################################################ +# simplified mnemonics not handled by at least one assembler +################################################################ +my $cmplw = sub { + my $f = shift; + my $cr = 0; $cr = shift if ($#_>1); + # Some out-of-date 32-bit GNU assembler just can't handle cmplw... + ($flavour =~ /linux.*32/) ? + " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : + " cmplw ".join(',',$cr,@_); +}; +my $bdnz = sub { + my $f = shift; + my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint + " bc $bo,0,".shift; +} if ($flavour!~/linux/); +my $bltlr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : + " bclr $bo,0"; +}; +my $bnelr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +my $beqlr = sub { + my $f = shift; + my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two +# arguments is 64, with "operand out of range" error. +my $extrdi = sub { + my ($f,$ra,$rs,$n,$b) = @_; + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; +}; +my $vmr = sub { + my ($f,$vx,$vy) = @_; + " vor $vx,$vy,$vy"; +}; + +# Some ABIs specify vrsave, special-purpose register #256, as reserved +# for system use. +my $no_vrsave = ($flavour =~ /aix|linux64(le|v2)/); +my $mtspr = sub { + my ($f,$idx,$ra) = @_; + if ($idx == 256 && $no_vrsave) { + " or $ra,$ra,$ra"; + } else { + " mtspr $idx,$ra"; + } +}; +my $mfspr = sub { + my ($f,$rd,$idx) = @_; + if ($idx == 256 && $no_vrsave) { + " li $rd,-1"; + } else { + " mfspr $rd,$idx"; + } +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x +my $lvx_splt = sub { vsxmem_op(@_, 332); }; # lxvdsx +# VSX instruction[s] masqueraded as made-up AltiVec/VMX +my $vpermdi = sub { # xxpermdi + my ($f, $vrt, $vra, $vrb, $dm) = @_; + $dm = oct($dm) if ($dm =~ /^0/); + " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($dm<<8)|(10<<3)|7; +}; + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +sub vfour { + my ($f, $vrt, $vra, $vrb, $vrc, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($vrc<<6)|$op; +}; +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +# These are not really crypto, but vcrypto_op template works +my $vaddudm = sub { vcrypto_op(@_, 192); }; +my $vadduqm = sub { vcrypto_op(@_, 256); }; +my $vmuleuw = sub { vcrypto_op(@_, 648); }; +my $vmulouw = sub { vcrypto_op(@_, 136); }; +my $vrld = sub { vcrypto_op(@_, 196); }; +my $vsld = sub { vcrypto_op(@_, 1476); }; +my $vsrd = sub { vcrypto_op(@_, 1732); }; +my $vsubudm = sub { vcrypto_op(@_, 1216); }; +my $vaddcuq = sub { vcrypto_op(@_, 320); }; +my $vaddeuqm = sub { vfour(@_,60); }; +my $vaddecuq = sub { vfour(@_,61); }; +my $vmrgew = sub { vfour(@_,0,1932); }; +my $vmrgow = sub { vfour(@_,0,1676); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; + +# VSX instructions masqueraded as AltiVec/VMX +my $mtvrd = sub { + my ($f, $vrt, $ra) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(179<<1)|1; +}; +my $mtvrwz = sub { + my ($f, $vrt, $ra) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(243<<1)|1; +}; +my $lvwzx_u = sub { vsxmem_op(@_, 12); }; # lxsiwzx +my $stvwx_u = sub { vsxmem_op(@_, 140); }; # stxsiwx + +# PowerISA 3.0 stuff +my $maddhdu = sub { vfour(@_,49); }; +my $maddld = sub { vfour(@_,51); }; +my $darn = sub { + my ($f, $rt, $l) = @_; + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); +}; +my $iseleq = sub { + my ($f, $rt, $ra, $rb) = @_; + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|(2<<6)|30; +}; +# VSX instruction[s] masqueraded as made-up AltiVec/VMX +my $vspltib = sub { # xxspltib + my ($f, $vrt, $imm8) = @_; + $imm8 = oct($imm8) if ($imm8 =~ /^0/); + $imm8 &= 0xff; + " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($imm8<<11)|(360<<1)|1; +}; + +# PowerISA 3.0B stuff +my $addex = sub { + my ($f, $rt, $ra, $rb, $cy) = @_; # only cy==0 is specified in 3.0B + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($cy<<9)|(170<<1); +}; +my $vmsumudm = sub { vfour(@_,35); }; + +while($line=<>) { + + $line =~ s|[#!;].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; + if ($label) { + my $xlated = ($GLOBALS{$label} or $label); + print "$xlated:"; + if ($flavour =~ /linux.*64(le|v2)/) { + if ($TYPES{$label} =~ /function/) { + printf "\n.localentry %s,0\n",$xlated; + } + } + } + } + + { + $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); + $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(/,\s*/,$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; diff --git a/security/nss/lib/freebl/scripts/sha512p8-ppc.pl b/security/nss/lib/freebl/scripts/sha512p8-ppc.pl new file mode 100644 index 0000000000..3bef98be7b --- /dev/null +++ b/security/nss/lib/freebl/scripts/sha512p8-ppc.pl @@ -0,0 +1,413 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, initially for use in the OpenSSL +# project. The module is dual licensed under OpenSSL and CRYPTOGAMS +# licenses depending on where you obtain it. For further details see +# https://github.com/dot-asm/cryptogams/. +# ==================================================================== + +# SHA256/512 for PowerISA v2.07. +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This module is +# ~60% faster than integer-only sha512-ppc.pl. To anchor to something +# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than +# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than +# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting +# result is degree of computational resources' utilization. POWER8 is +# "massively multi-threaded chip" and difference between single- and +# maximum multi-process benchmark results tells that utilization is +# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and +# for sha1-ppc.pl - 73%. 100% means that multi-process result equals +# to single-process one, given that all threads end up on the same +# physical core. +# +###################################################################### +# Believed-to-be-accurate results in cycles per processed byte [on +# little-endian system]. Numbers in square brackets are for 64-bit +# build of sha512-ppc.pl, presented for reference. +# +# POWER8 POWER9 +# SHA256 9.7 [15.8] 11.2 [12.5] +# SHA512 6.1 [10.3] 7.0 [7.9] + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$LENDIAN=($flavour=~/le/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +if ($output =~ /512/) { + $bits=512; + $SZ=8; + $sz="d"; + $rounds=80; +} else { + $bits=256; + $SZ=4; + $sz="w"; + $rounds=64; +} + +$func="sha${bits}_block_p8"; +$LOCALS=8*$SIZE_T+8*16; +$FRAME=$LOCALS+9*16+6*$SIZE_T; + +$sp ="r1"; +$toc="r2"; +$ctx="r3"; +$inp="r4"; +$num="r5"; +$Tbl="r6"; +$idx="r7"; +$lrsave="r8"; +$offload="r11"; +$vrsave="r12"; +@I = ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70) = (0,map("r$_",(10,26..31))); + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); +@X=map("v$_",(8..19,24..27)); +($Ki,$Func,$Sigma,$lemask)=map("v$_",(28..31)); + +sub ROUND { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)%16; +my $k=($i+2)%8; + +$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); + lvx_u @X[$i+1],0,$inp ; load X[i] in advance + addi $inp,$inp,16 +___ +$code.=<<___ if ($i<16 && ($i%(16/$SZ))); + vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ +___ +$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); + vperm @X[$i],@X[$i],@X[$i],$lemask +___ +$code.=<<___ if ($i>=15); + vshasigma${sz} $Sigma,@X[($j+1)%16],0,0 + vaddu${sz}m @X[$j],@X[$j],$Sigma + vshasigma${sz} $Sigma,@X[($j+14)%16],0,15 + vaddu${sz}m @X[$j],@X[$j],$Sigma + vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16] +___ +$code.=<<___; + vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] + vsel $Func,$g,$f,$e ; Ch(e,f,g) + vaddu${sz}m $g,$g,$Ki ; future h+=K[i] + vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) + vshasigma${sz} $Sigma,$e,1,15 ; Sigma1(e) + vaddu${sz}m $h,$h,$Sigma ; h+=Sigma1(e) + vxor $Func,$a,$b + vsel $Func,$b,$c,$Func ; Maj(a,b,c) + vaddu${sz}m $d,$d,$h ; d+=h + vshasigma${sz} $Sigma,$a,1,0 ; Sigma0(a) + vaddu${sz}m $Sigma,$Sigma,$Func ; Sigma0(a)+Maj(a,b,c) + vaddu${sz}m $h,$h,$Sigma ; h+=Sigma0(a)+Maj(a,b,c) + lvx $Ki,@I[$k],$idx ; load next K[i] +___ +$code.=<<___ if ($k == 7); + addi $idx,$idx,0x80 +___ +} + +$code=<<___; +.machine "any" +.text + +.globl $func +.align 6 +$func: + $STU $sp,-$FRAME($sp) + mflr $lrsave + li r10,`$LOCALS+15` + li r11,`$LOCALS+31` + stvx v24,r10,$sp # ABI says so + addi r10,r10,32 + mfspr $vrsave,256 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r11,-4096+255 # 0xfffff0ff + stw $vrsave,`$FRAME-6*$SIZE_T-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME-6*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME-5*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME-4*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME-3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME-2*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME-1*$SIZE_T`($sp) + li $x70,0x70 + $PUSH $lrsave,`$FRAME+$LRSAVE`($sp) + mtspr 256,r11 + + bl LPICmeup + addi $offload,$sp,`8*$SIZE_T+15` +___ +$code.=<<___ if ($LENDIAN); + li $idx,8 + lvsl $lemask,0,$idx + vspltisb $Ki,0x0f + vxor $lemask,$lemask,$Ki +___ +$code.=<<___ if ($SZ==4); + lvx_4w $A,$x00,$ctx + lvx_4w $E,$x10,$ctx + vsldoi $B,$A,$A,4 # unpack + vsldoi $C,$A,$A,8 + vsldoi $D,$A,$A,12 + vsldoi $F,$E,$E,4 + vsldoi $G,$E,$E,8 + vsldoi $H,$E,$E,12 +___ +$code.=<<___ if ($SZ==8); + lvx_u $A,$x00,$ctx + lvx_u $C,$x10,$ctx + lvx_u $E,$x20,$ctx + vsldoi $B,$A,$A,8 # unpack + lvx_u $G,$x30,$ctx + vsldoi $D,$C,$C,8 + vsldoi $F,$E,$E,8 + vsldoi $H,$G,$G,8 +___ +$code.=<<___; + li r0,`($rounds-16)/16` # inner loop counter + b Loop +.align 5 +Loop: + lvx $Ki,$x00,$Tbl + lvx_u @X[0],0,$inp + addi $inp,$inp,16 + mr $idx,$Tbl # copy $Tbl + stvx $A,$x00,$offload # offload $A-$H + stvx $B,$x10,$offload + stvx $C,$x20,$offload + stvx $D,$x30,$offload + stvx $E,$x40,$offload + stvx $F,$x50,$offload + stvx $G,$x60,$offload + stvx $H,$x70,$offload + vaddu${sz}m $H,$H,$Ki # h+K[i] + lvx $Ki,$x10,$Tbl +___ +for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mtctr r0 + b L16_xx +.align 5 +L16_xx: +___ +for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bdnz L16_xx + + lvx @X[2],$x00,$offload + subic. $num,$num,1 + lvx @X[3],$x10,$offload + vaddu${sz}m $A,$A,@X[2] + lvx @X[4],$x20,$offload + vaddu${sz}m $B,$B,@X[3] + lvx @X[5],$x30,$offload + vaddu${sz}m $C,$C,@X[4] + lvx @X[6],$x40,$offload + vaddu${sz}m $D,$D,@X[5] + lvx @X[7],$x50,$offload + vaddu${sz}m $E,$E,@X[6] + lvx @X[8],$x60,$offload + vaddu${sz}m $F,$F,@X[7] + lvx @X[9],$x70,$offload + vaddu${sz}m $G,$G,@X[8] + vaddu${sz}m $H,$H,@X[9] + bne Loop +___ +$code.=<<___ if ($SZ==4); + lvx @X[0],$x20,$idx + vperm $A,$A,$B,$Ki # pack the answer + lvx @X[1],$x30,$idx + vperm $E,$E,$F,$Ki + vperm $A,$A,$C,@X[0] + vperm $E,$E,$G,@X[0] + vperm $A,$A,$D,@X[1] + vperm $E,$E,$H,@X[1] + stvx_4w $A,$x00,$ctx + stvx_4w $E,$x10,$ctx +___ +$code.=<<___ if ($SZ==8); + vperm $A,$A,$B,$Ki # pack the answer + vperm $C,$C,$D,$Ki + vperm $E,$E,$F,$Ki + vperm $G,$G,$H,$Ki + stvx_u $A,$x00,$ctx + stvx_u $C,$x10,$ctx + stvx_u $E,$x20,$ctx + stvx_u $G,$x30,$ctx +___ +$code.=<<___; + addi $offload,$sp,`$LOCALS+15` + mtlr $lrsave + mtspr 256,$vrsave + lvx v24,$x00,$offload # ABI says so + lvx v25,$x10,$offload + lvx v26,$x20,$offload + lvx v27,$x30,$offload + lvx v28,$x40,$offload + lvx v29,$x50,$offload + lvx v30,$x60,$offload + lvx v31,$x70,$offload + $POP r26,`$FRAME-6*$SIZE_T`($sp) + $POP r27,`$FRAME-5*$SIZE_T`($sp) + $POP r28,`$FRAME-4*$SIZE_T`($sp) + $POP r29,`$FRAME-3*$SIZE_T`($sp) + $POP r30,`$FRAME-2*$SIZE_T`($sp) + $POP r31,`$FRAME-1*$SIZE_T`($sp) + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,6,3,0 + .long 0 +.size $func,.-$func +___ + +# Ugly hack here, because PPC assembler syntax seem to vary too +# much from platforms to platform... +$code.=<<___; +.align 6 +LPICmeup: + mflr r0 + bcl 20,31,\$+4 + mflr $Tbl ; vvvvvv "distance" between . and 1st data entry + addi $Tbl,$Tbl,`64-8` + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` +___ + +if ($SZ==8) { + local *table = sub { + foreach(@_) { $code.=".quad $_,$_\n"; } + }; + table( + "0x428a2f98d728ae22","0x7137449123ef65cd", + "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", + "0x3956c25bf348b538","0x59f111f1b605d019", + "0x923f82a4af194f9b","0xab1c5ed5da6d8118", + "0xd807aa98a3030242","0x12835b0145706fbe", + "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", + "0x72be5d74f27b896f","0x80deb1fe3b1696b1", + "0x9bdc06a725c71235","0xc19bf174cf692694", + "0xe49b69c19ef14ad2","0xefbe4786384f25e3", + "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", + "0x2de92c6f592b0275","0x4a7484aa6ea6e483", + "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", + "0x983e5152ee66dfab","0xa831c66d2db43210", + "0xb00327c898fb213f","0xbf597fc7beef0ee4", + "0xc6e00bf33da88fc2","0xd5a79147930aa725", + "0x06ca6351e003826f","0x142929670a0e6e70", + "0x27b70a8546d22ffc","0x2e1b21385c26c926", + "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", + "0x650a73548baf63de","0x766a0abb3c77b2a8", + "0x81c2c92e47edaee6","0x92722c851482353b", + "0xa2bfe8a14cf10364","0xa81a664bbc423001", + "0xc24b8b70d0f89791","0xc76c51a30654be30", + "0xd192e819d6ef5218","0xd69906245565a910", + "0xf40e35855771202a","0x106aa07032bbd1b8", + "0x19a4c116b8d2d0c8","0x1e376c085141ab53", + "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", + "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", + "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", + "0x748f82ee5defb2fc","0x78a5636f43172f60", + "0x84c87814a1f0ab72","0x8cc702081a6439ec", + "0x90befffa23631e28","0xa4506cebde82bde9", + "0xbef9a3f7b2c67915","0xc67178f2e372532b", + "0xca273eceea26619c","0xd186b8c721c0c207", + "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", + "0x06f067aa72176fba","0x0a637dc5a2c898a6", + "0x113f9804bef90dae","0x1b710b35131c471b", + "0x28db77f523047d84","0x32caab7b40c72493", + "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", + "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", + "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); +$code.=<<___ if (!$LENDIAN); +.quad 0x0001020304050607,0x1011121314151617 +___ +$code.=<<___ if ($LENDIAN); # quad-swapped +.quad 0x1011121314151617,0x0001020304050607 +___ +} else { + local *table = sub { + foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } + }; + table( + "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", + "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", + "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", + "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", + "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", + "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", + "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", + "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", + "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", + "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", + "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", + "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", + "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", + "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", + "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", + "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); +$code.=<<___ if (!$LENDIAN); +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +___ +$code.=<<___ if ($LENDIAN); # word-swapped +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +___ +} +$code.=<<___; +.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/security/nss/lib/freebl/secmpi.c b/security/nss/lib/freebl/secmpi.c new file mode 100644 index 0000000000..7d6ee4405b --- /dev/null +++ b/security/nss/lib/freebl/secmpi.c @@ -0,0 +1,28 @@ +#include "blapi.h" + +#include "mpi.h" +#include "mpprime.h" + +mp_err +mpp_random_secure(mp_int *a) +{ + SECStatus rv; + rv = RNG_GenerateGlobalRandomBytes((unsigned char *)MP_DIGITS(a), MP_USED(a) * sizeof(mp_digit)); + if (rv != SECSuccess) { + return MP_UNDEF; + } + MP_SIGN(a) = MP_ZPOS; + return MP_OKAY; +} + +mp_err +mpp_pprime_secure(mp_int *a, int nt) +{ + return mpp_pprime_ext_random(a, nt, &mpp_random_secure); +} + +mp_err +mpp_make_prime_secure(mp_int *start, mp_size nBits, mp_size strong) +{ + return mpp_make_prime_ext_random(start, nBits, strong, &mpp_random_secure); +} diff --git a/security/nss/lib/freebl/secmpi.h b/security/nss/lib/freebl/secmpi.h new file mode 100644 index 0000000000..53f9a53c10 --- /dev/null +++ b/security/nss/lib/freebl/secmpi.h @@ -0,0 +1,63 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" + +#define CHECK_SEC_OK(func) \ + if (SECSuccess != (rv = func)) \ + goto cleanup + +#define CHECK_MPI_OK(func) \ + if (MP_OKAY > (err = func)) \ + goto cleanup + +#define OCTETS_TO_MPINT(oc, mp, len) \ + CHECK_MPI_OK(mp_read_unsigned_octets((mp), oc, len)) + +#define SECITEM_TO_MPINT(it, mp) \ + CHECK_MPI_OK(mp_read_unsigned_octets((mp), (it).data, (it).len)) + +#define MPINT_TO_SECITEM(mp, it, arena) \ + do { \ + int mpintLen = mp_unsigned_octet_size(mp); \ + if (mpintLen <= 0) { \ + err = MP_RANGE; \ + goto cleanup; \ + } \ + SECITEM_AllocItem(arena, (it), mpintLen); \ + if ((it)->data == NULL) { \ + err = MP_MEM; \ + goto cleanup; \ + } \ + err = mp_to_unsigned_octets(mp, (it)->data, (it)->len); \ + if (err < 0) \ + goto cleanup; \ + else \ + err = MP_OKAY; \ + } while (0) + +#define MP_TO_SEC_ERROR(err) \ + switch (err) { \ + case MP_MEM: \ + PORT_SetError(SEC_ERROR_NO_MEMORY); \ + break; \ + case MP_RANGE: \ + PORT_SetError(SEC_ERROR_BAD_DATA); \ + break; \ + case MP_BADARG: \ + PORT_SetError(SEC_ERROR_INVALID_ARGS); \ + break; \ + default: \ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); \ + break; \ + } + +/* Fill the `used` digits of an mp_int with random bits */ +mp_err mpp_random_secure(mp_int *a); + +/* Pseudo-primality testing using `mpp_random_secure` to choose Miller-Rabin base */ +mp_err mpp_pprime_secure(mp_int *a, int nt); + +/* Variant of `mpp_make_prime` using `mpp_random_secure` to choose Miller-Rabin base */ +mp_err mpp_make_prime_secure(mp_int *start, mp_size nBits, mp_size strong); diff --git a/security/nss/lib/freebl/secrng.h b/security/nss/lib/freebl/secrng.h new file mode 100644 index 0000000000..19eae48331 --- /dev/null +++ b/security/nss/lib/freebl/secrng.h @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SECRNG_H_ +#define _SECRNG_H_ +/* + * secrng.h - public data structures and prototypes for the secure random + * number generator + */ + +/******************************************/ +/* +** Random number generation. A cryptographically strong random number +** generator. +*/ + +#include "blapi.h" + +/* the number of bytes to read from the system random number generator */ +#define SYSTEM_RNG_SEED_COUNT 1024 + +SEC_BEGIN_PROTOS + +/* +** The following functions are provided by the security library +** but are differently implemented for the UNIX, Win, and OS/2 +** versions +*/ + +/* +** Get the "noisiest" information available on the system. +** The amount of data returned depends on the system implementation. +** It will not exceed maxbytes, but may be (much) less. +** Returns number of noise bytes copied into buf, or zero if error. +*/ +extern size_t RNG_GetNoise(void *buf, size_t maxbytes); + +/* +** RNG_SystemInfoForRNG should be called before any use of SSL. It +** gathers up the system specific information to help seed the +** state of the global random number generator. +*/ +extern void RNG_SystemInfoForRNG(void); + +/* +** Use the contents (and stat) of a file to help seed the +** global random number generator. +*/ +extern void RNG_FileForRNG(const char *filename); + +/* +** Get maxbytes bytes of random data from the system random number +** generator. +** Returns the number of bytes copied into buf -- maxbytes if success +** or zero if error. +** Errors: +** PR_NOT_IMPLEMENTED_ERROR There is no system RNG on the platform. +** SEC_ERROR_NEED_RANDOM The system RNG failed. +*/ +extern size_t RNG_SystemRNG(void *buf, size_t maxbytes); + +SEC_END_PROTOS + +#endif /* _SECRNG_H_ */ diff --git a/security/nss/lib/freebl/sha-fast-amd64-sun.s b/security/nss/lib/freebl/sha-fast-amd64-sun.s new file mode 100644 index 0000000000..6430469a4e --- /dev/null +++ b/security/nss/lib/freebl/sha-fast-amd64-sun.s @@ -0,0 +1,2151 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "sha_fast.c" + .text + .align 16 +.globl SHA1_Begin + .type SHA1_Begin, @function +SHA1_Begin: +.LFB4: + movl $4023233417, %ecx + movl $2562383102, %edx + movl $3285377520, %eax + movq $0, 64(%rdi) + movq $1732584193, 72(%rdi) + movq %rcx, 80(%rdi) + movq %rdx, 88(%rdi) + movq $271733878, 96(%rdi) + movq %rax, 104(%rdi) + ret +.LFE4: + .size SHA1_Begin, .-SHA1_Begin + .align 16 + .type shaCompress, @function +shaCompress: +.LFB7: + pushq %r15 +.LCFI0: + pushq %r14 +.LCFI1: + pushq %r13 +.LCFI2: + pushq %r12 +.LCFI3: + movq -88(%rdi), %r12 + movq -80(%rdi), %r10 + movq -72(%rdi), %r13 + movq -64(%rdi), %r8 + pushq %rbx +.LCFI4: + movq -56(%rdi), %rcx + movl (%rsi), %eax + movl %r12d, %edx + movq %r13, %r9 + roll $5, %edx + movl 4(%rsi), %ebx + xorq %r8, %r9 +/APP + bswap %eax +/NO_APP + andq %r10, %r9 + mov %eax, %r15d + roll $30, %r10d + movq %r15, -48(%rdi) + xorq %r8, %r9 + movq -48(%rdi), %r14 + addq %r9, %rdx + movq %r10, %rax + movl %r12d, %r15d + addq %rcx, %rdx + xorq %r13, %rax + roll $30, %r15d + leaq 1518500249(%rdx,%r14), %rdx + andq %r12, %rax + movq %r15, %r12 +/APP + bswap %ebx +/NO_APP + movl %edx, %ecx + mov %ebx, %r11d + xorq %r13, %rax + movq %r11, -40(%rdi) + roll $5, %ecx + movq -40(%rdi), %r9 + addq %rax, %rcx + xorq %r10, %r12 + movl 8(%rsi), %r14d + addq %r8, %rcx + andq %rdx, %r12 + movl %edx, %r11d + leaq 1518500249(%rcx,%r9), %rcx + xorq %r10, %r12 + roll $30, %r11d +/APP + bswap %r14d +/NO_APP + movl %ecx, %r8d + mov %r14d, %ebx + movl 12(%rsi), %r9d + movq %rbx, -32(%rdi) + roll $5, %r8d + movq -32(%rdi), %rax + addq %r12, %r8 + movq %r11, %r12 + movl %ecx, %ebx + addq %r13, %r8 + xorq %r15, %r12 + roll $30, %ebx + leaq 1518500249(%r8,%rax), %r8 + andq %rcx, %r12 + movl 16(%rsi), %eax +/APP + bswap %r9d +/NO_APP + movl %r8d, %edx + mov %r9d, %r14d + xorq %r15, %r12 + movq %r14, -24(%rdi) + roll $5, %edx + movq -24(%rdi), %r13 + addq %r12, %rdx + movq %rbx, %r12 + movl %r8d, %r14d + addq %r10, %rdx + leaq 1518500249(%rdx,%r13), %rdx + movl 20(%rsi), %r13d +/APP + bswap %eax +/NO_APP + movl %edx, %ecx + mov %eax, %r9d + roll $5, %ecx + xorq %r11, %r12 + movq %r9, -16(%rdi) + andq %r8, %r12 + movq -16(%rdi), %r10 + roll $30, %r14d + xorq %r11, %r12 + movq %r14, %rax + movl %edx, %r9d + addq %r12, %rcx + xorq %rbx, %rax + roll $30, %r9d + addq %r15, %rcx + andq %rdx, %rax + leaq 1518500249(%rcx,%r10), %rcx + xorq %rbx, %rax + movl 24(%rsi), %r10d +/APP + bswap %r13d +/NO_APP + movl %ecx, %r8d + mov %r13d, %r15d + movq %r15, -8(%rdi) + roll $5, %r8d + movq -8(%rdi), %r12 + addq %rax, %r8 + movl %ecx, %r15d + addq %r11, %r8 + movq %r9, %r11 + roll $30, %r15d + leaq 1518500249(%r8,%r12), %r8 + xorq %r14, %r11 + movl 28(%rsi), %r12d +/APP + bswap %r10d +/NO_APP + andq %rcx, %r11 + mov %r10d, %r13d + movl %r8d, %edx + movq %r13, (%rdi) + xorq %r14, %r11 + movq (%rdi), %rax + roll $5, %edx + movq %r15, %r10 + movl %r8d, %r13d + addq %r11, %rdx + xorq %r9, %r10 + roll $30, %r13d + addq %rbx, %rdx + andq %r8, %r10 + leaq 1518500249(%rdx,%rax), %rdx + xorq %r9, %r10 + movl 32(%rsi), %eax +/APP + bswap %r12d +/NO_APP + movl %edx, %ecx + mov %r12d, %ebx + movq %rbx, 8(%rdi) + roll $5, %ecx + movq 8(%rdi), %r11 + addq %r10, %rcx + movq %r13, %r10 + movl %edx, %ebx + addq %r14, %rcx + leaq 1518500249(%rcx,%r11), %rcx +/APP + bswap %eax +/NO_APP + movl %ecx, %r8d + mov %eax, %r12d + roll $5, %r8d + xorq %r15, %r10 + movq %r12, 16(%rdi) + andq %rdx, %r10 + movq 16(%rdi), %r14 + roll $30, %ebx + xorq %r15, %r10 + movq %rbx, %rax + movl 36(%rsi), %r11d + addq %r10, %r8 + xorq %r13, %rax + movl %ecx, %r12d + addq %r9, %r8 + andq %rcx, %rax + roll $30, %r12d + leaq 1518500249(%r8,%r14), %r8 + xorq %r13, %rax + movl 40(%rsi), %r14d +/APP + bswap %r11d +/NO_APP + movl %r8d, %edx + mov %r11d, %r9d + movq %r12, %r11 + movq %r9, 24(%rdi) + roll $5, %edx + movq 24(%rdi), %r10 + addq %rax, %rdx + xorq %rbx, %r11 + movl %r8d, %r9d + addq %r15, %rdx + andq %r8, %r11 + roll $30, %r9d + leaq 1518500249(%rdx,%r10), %rdx + xorq %rbx, %r11 + movl 44(%rsi), %r10d +/APP + bswap %r14d +/NO_APP + movl %edx, %ecx + mov %r14d, %r15d + movq %r15, 32(%rdi) + roll $5, %ecx + movq 32(%rdi), %rax + addq %r11, %rcx + movq %r9, %r11 + movl %edx, %r15d + addq %r13, %rcx + xorq %r12, %r11 + roll $30, %r15d + leaq 1518500249(%rcx,%rax), %rcx + andq %rdx, %r11 + movl 48(%rsi), %eax +/APP + bswap %r10d +/NO_APP + movl %ecx, %r8d + mov %r10d, %r14d + xorq %r12, %r11 + movq %r14, 40(%rdi) + roll $5, %r8d + movq 40(%rdi), %r13 + addq %r11, %r8 + movq %r15, %r10 + movl %ecx, %r14d + addq %rbx, %r8 + xorq %r9, %r10 + leaq 1518500249(%r8,%r13), %r8 + movl 52(%rsi), %r13d +/APP + bswap %eax +/NO_APP + movl %r8d, %edx + mov %eax, %ebx + roll $5, %edx + andq %rcx, %r10 + movq %rbx, 48(%rdi) + xorq %r9, %r10 + movq 48(%rdi), %r11 + roll $30, %r14d + addq %r10, %rdx + movq %r14, %rax + movl %r8d, %ebx + addq %r12, %rdx + xorq %r15, %rax + roll $30, %ebx + leaq 1518500249(%rdx,%r11), %rdx + andq %r8, %rax + movl 56(%rsi), %r11d +/APP + bswap %r13d +/NO_APP + movl %edx, %ecx + mov %r13d, %r12d + xorq %r15, %rax + movq %r12, 56(%rdi) + roll $5, %ecx + movq 56(%rdi), %r10 + addq %rax, %rcx + movl %edx, %r12d + addq %r9, %rcx + movq %rbx, %r9 + roll $30, %r12d + leaq 1518500249(%rcx,%r10), %rcx + xorq %r14, %r9 + movl 60(%rsi), %r10d +/APP + bswap %r11d +/NO_APP + andq %rdx, %r9 + mov %r11d, %r13d + movl %ecx, %r8d + movq %r13, 64(%rdi) + xorq %r14, %r9 + movq 64(%rdi), %rax + roll $5, %r8d + movq %r12, %r11 + movl %ecx, %r13d + addq %r9, %r8 + xorq %rbx, %r11 + roll $30, %r13d + addq %r15, %r8 + andq %rcx, %r11 + leaq 1518500249(%r8,%rax), %r8 + xorq %rbx, %r11 +/APP + bswap %r10d +/NO_APP + movl %r8d, %esi + mov %r10d, %r15d + movq %r15, 72(%rdi) + roll $5, %esi + movq 72(%rdi), %r9 + movq 56(%rdi), %r10 + movq 16(%rdi), %rcx + addq %r11, %rsi + movq -32(%rdi), %rdx + addq %r14, %rsi + movq -48(%rdi), %rax + leaq 1518500249(%rsi,%r9), %r14 + movq %r13, %r11 + movl %r8d, %r15d + xorq %rcx, %r10 + xorq %rdx, %r10 + movl %r14d, %ecx + xorl %eax, %r10d + roll %r10d + roll $5, %ecx + xorq %r12, %r11 + andq %r8, %r11 + movq %r10, -48(%rdi) + movq -48(%rdi), %r9 + xorq %r12, %r11 + roll $30, %r15d + movl %r14d, %r10d + addq %r11, %rcx + movq 64(%rdi), %r11 + movq 24(%rdi), %rdx + addq %rbx, %rcx + movq -24(%rdi), %rbx + movq -40(%rdi), %rax + leaq 1518500249(%rcx,%r9), %rcx + movq %r15, %r8 + roll $30, %r10d + xorq %rdx, %r11 + xorq %r13, %r8 + xorq %rbx, %r11 + andq %r14, %r8 + movl %ecx, %r9d + xorl %eax, %r11d + xorq %r13, %r8 + roll $5, %r9d + roll %r11d + addq %r8, %r9 + movq %r10, %rax + movq %r11, -40(%rdi) + movq -40(%rdi), %rsi + addq %r12, %r9 + movq 72(%rdi), %rbx + movq 32(%rdi), %rdx + xorq %r15, %rax + movq -16(%rdi), %r14 + movq -32(%rdi), %r12 + andq %rcx, %rax + leaq 1518500249(%r9,%rsi), %r9 + xorq %r15, %rax + movl %ecx, %r11d + xorq %rdx, %rbx + roll $30, %r11d + xorq %r14, %rbx + movl %r9d, %esi + xorl %r12d, %ebx + roll $5, %esi + roll %ebx + addq %rax, %rsi + movq %rbx, -32(%rdi) + movq -32(%rdi), %r8 + addq %r13, %rsi + movq -48(%rdi), %r12 + movq 40(%rdi), %rdx + movq %r11, %r13 + movq -8(%rdi), %r14 + movq -24(%rdi), %rcx + movl %r9d, %ebx + leaq 1518500249(%rsi,%r8), %rsi + xorq %rdx, %r12 + xorq %r14, %r12 + movl %esi, %r8d + xorl %ecx, %r12d + roll %r12d + roll $5, %r8d + xorq %r10, %r13 + andq %r9, %r13 + movq %r12, -24(%rdi) + movq -24(%rdi), %rax + xorq %r10, %r13 + roll $30, %ebx + movl %esi, %r12d + addq %r13, %r8 + xorq %rbx, %rsi + roll $30, %r12d + addq %r15, %r8 + movq -40(%rdi), %r15 + movq 48(%rdi), %rdx + movq (%rdi), %r14 + movq -16(%rdi), %r9 + leaq 1518500249(%r8,%rax), %r13 + xorq %r11, %rsi + xorq %rdx, %r15 + movl %r13d, %ecx + xorq %r14, %r15 + roll $5, %ecx + xorl %r9d, %r15d + addq %rsi, %rcx + roll %r15d + addq %r10, %rcx + movq %r15, -16(%rdi) + movq -16(%rdi), %rsi + movl %r13d, %r15d + movq -32(%rdi), %r14 + movq 56(%rdi), %rax + xorq %r12, %r13 + movq 8(%rdi), %rdx + movq -8(%rdi), %r10 + xorq %rbx, %r13 + leaq 1859775393(%rcx,%rsi), %r9 + roll $30, %r15d + xorq %rax, %r14 + xorq %rdx, %r14 + movl %r9d, %esi + xorl %r10d, %r14d + roll $5, %esi + roll %r14d + addq %r13, %rsi + movq %r14, -8(%rdi) + movq -8(%rdi), %r8 + addq %r11, %rsi + movq -24(%rdi), %r13 + movq 64(%rdi), %rax + movl %r9d, %r14d + movq 16(%rdi), %rdx + movq (%rdi), %r11 + xorq %r15, %r9 + leaq 1859775393(%rsi,%r8), %r10 + xorq %rax, %r13 + xorq %rdx, %r13 + movl %r10d, %r8d + xorl %r11d, %r13d + roll $5, %r8d + roll %r13d + xorq %r12, %r9 + roll $30, %r14d + addq %r9, %r8 + movq %r13, (%rdi) + movq (%rdi), %rcx + addq %rbx, %r8 + movq -16(%rdi), %rbx + movq 72(%rdi), %rax + movq 24(%rdi), %rdx + movq 8(%rdi), %r9 + movl %r10d, %r13d + leaq 1859775393(%r8,%rcx), %r11 + xorq %r14, %r10 + roll $30, %r13d + xorq %rax, %rbx + xorq %r15, %r10 + xorq %rdx, %rbx + movl %r11d, %ecx + xorl %r9d, %ebx + roll $5, %ecx + roll %ebx + addq %r10, %rcx + movq %rbx, 8(%rdi) + movq 8(%rdi), %rsi + addq %r12, %rcx + movq -8(%rdi), %r12 + movq -48(%rdi), %rax + movl %r11d, %ebx + movq 32(%rdi), %rdx + movq 16(%rdi), %r9 + xorq %r13, %r11 + leaq 1859775393(%rcx,%rsi), %r10 + xorq %r14, %r11 + roll $30, %ebx + xorq %rax, %r12 + xorq %rdx, %r12 + movl %r10d, %esi + xorl %r9d, %r12d + roll $5, %esi + roll %r12d + addq %r11, %rsi + movq %r12, 16(%rdi) + addq %r15, %rsi + movq 16(%rdi), %r8 + movq (%rdi), %r15 + movq -40(%rdi), %rax + movl %r10d, %r12d + movq 40(%rdi), %rdx + movq 24(%rdi), %r9 + xorq %rbx, %r10 + leaq 1859775393(%rsi,%r8), %r11 + xorq %r13, %r10 + xorq %rax, %r15 + xorq %rdx, %r15 + movl %r11d, %r8d + xorl %r9d, %r15d + roll $5, %r8d + roll %r15d + addq %r10, %r8 + movq %r15, 24(%rdi) + movq 24(%rdi), %rcx + addq %r14, %r8 + movq 8(%rdi), %r14 + movq -32(%rdi), %rax + roll $30, %r12d + movq 48(%rdi), %rdx + movq 32(%rdi), %r10 + movl %r11d, %r15d + leaq 1859775393(%r8,%rcx), %r9 + xorq %r12, %r11 + roll $30, %r15d + xorq %rax, %r14 + xorq %rbx, %r11 + xorq %rdx, %r14 + movl %r9d, %ecx + xorl %r10d, %r14d + roll $5, %ecx + roll %r14d + addq %r11, %rcx + movq %r14, 32(%rdi) + addq %r13, %rcx + movq 32(%rdi), %rsi + movq 16(%rdi), %r13 + movq -24(%rdi), %rax + movl %r9d, %r14d + movq 56(%rdi), %rdx + movq 40(%rdi), %r11 + xorq %r15, %r9 + leaq 1859775393(%rcx,%rsi), %r10 + xorq %r12, %r9 + roll $30, %r14d + xorq %rax, %r13 + xorq %rdx, %r13 + movl %r10d, %esi + xorl %r11d, %r13d + roll $5, %esi + roll %r13d + addq %r9, %rsi + movq %r13, 40(%rdi) + movq 40(%rdi), %r8 + addq %rbx, %rsi + movq 24(%rdi), %rbx + movq -16(%rdi), %rax + movl %r10d, %r13d + movq 64(%rdi), %rdx + movq 48(%rdi), %r9 + xorq %r14, %r10 + leaq 1859775393(%rsi,%r8), %r11 + xorq %r15, %r10 + roll $30, %r13d + xorq %rax, %rbx + xorq %rdx, %rbx + movl %r11d, %r8d + xorl %r9d, %ebx + roll $5, %r8d + roll %ebx + addq %r10, %r8 + movq %rbx, 48(%rdi) + addq %r12, %r8 + movq 48(%rdi), %rcx + movq 32(%rdi), %r12 + movq -8(%rdi), %rax + movl %r11d, %ebx + movq 72(%rdi), %rdx + movq 56(%rdi), %r9 + leaq 1859775393(%r8,%rcx), %r10 + xorq %rax, %r12 + xorq %rdx, %r12 + movl %r10d, %ecx + xorl %r9d, %r12d + xorq %r13, %r11 + roll $5, %ecx + xorq %r14, %r11 + roll %r12d + roll $30, %ebx + addq %r11, %rcx + movq %r12, 56(%rdi) + movq 56(%rdi), %rsi + addq %r15, %rcx + movq 40(%rdi), %r15 + movq (%rdi), %rax + movq -48(%rdi), %rdx + movq 64(%rdi), %r9 + movl %r10d, %r12d + leaq 1859775393(%rcx,%rsi), %r11 + xorq %rbx, %r10 + roll $30, %r12d + xorq %rax, %r15 + xorq %r13, %r10 + xorq %rdx, %r15 + movl %r11d, %esi + xorl %r9d, %r15d + roll $5, %esi + roll %r15d + addq %r10, %rsi + movq %r15, 64(%rdi) + movq 64(%rdi), %r8 + addq %r14, %rsi + movq 48(%rdi), %r14 + movq 8(%rdi), %rax + movl %r11d, %r15d + movq -40(%rdi), %rdx + movq 72(%rdi), %r10 + xorq %r12, %r11 + leaq 1859775393(%rsi,%r8), %r9 + xorq %rbx, %r11 + roll $30, %r15d + xorq %rax, %r14 + xorq %rdx, %r14 + movl %r9d, %r8d + xorl %r10d, %r14d + roll $5, %r8d + roll %r14d + addq %r11, %r8 + movq %r14, 72(%rdi) + addq %r13, %r8 + movq 72(%rdi), %rcx + movq 56(%rdi), %r13 + movq 16(%rdi), %rax + movl %r9d, %r14d + movq -32(%rdi), %rdx + movq -48(%rdi), %r11 + leaq 1859775393(%r8,%rcx), %r10 + xorq %rax, %r13 + xorq %rdx, %r13 + movl %r10d, %ecx + xorl %r11d, %r13d + roll $5, %ecx + roll %r13d + xorq %r15, %r9 + roll $30, %r14d + xorq %r12, %r9 + movq %r13, -48(%rdi) + movq -48(%rdi), %rsi + addq %r9, %rcx + movl %r10d, %r13d + xorq %r14, %r10 + addq %rbx, %rcx + movq 64(%rdi), %rbx + movq 24(%rdi), %rax + movq -24(%rdi), %rdx + leaq 1859775393(%rcx,%rsi), %r11 + movq -40(%rdi), %r9 + xorq %r15, %r10 + roll $30, %r13d + xorq %rax, %rbx + movl %r11d, %esi + xorq %rdx, %rbx + roll $5, %esi + xorl %r9d, %ebx + addq %r10, %rsi + roll %ebx + addq %r12, %rsi + movq %rbx, -40(%rdi) + movq -40(%rdi), %r8 + movl %r11d, %ebx + movq 72(%rdi), %r12 + movq 32(%rdi), %rax + xorq %r13, %r11 + movq -16(%rdi), %rdx + movq -32(%rdi), %r9 + xorq %r14, %r11 + leaq 1859775393(%rsi,%r8), %r10 + roll $30, %ebx + xorq %rax, %r12 + xorq %rdx, %r12 + movl %r10d, %r8d + xorl %r9d, %r12d + roll $5, %r8d + roll %r12d + addq %r11, %r8 + movq %r12, -32(%rdi) + movq -32(%rdi), %rcx + addq %r15, %r8 + movq -48(%rdi), %r15 + movq 40(%rdi), %rax + movl %r10d, %r12d + movq -8(%rdi), %rdx + movq -24(%rdi), %r9 + xorq %rbx, %r10 + leaq 1859775393(%r8,%rcx), %r11 + xorq %r13, %r10 + xorq %rax, %r15 + xorq %rdx, %r15 + movl %r11d, %ecx + xorl %r9d, %r15d + roll $5, %ecx + roll %r15d + addq %r10, %rcx + addq %r14, %rcx + movq %r15, -24(%rdi) + movq -24(%rdi), %rsi + movq -40(%rdi), %r14 + movq 48(%rdi), %rax + roll $30, %r12d + movq (%rdi), %rdx + movq -16(%rdi), %r10 + movl %r11d, %r15d + leaq 1859775393(%rcx,%rsi), %r9 + xorq %r12, %r11 + roll $30, %r15d + xorq %rax, %r14 + xorq %rbx, %r11 + xorq %rdx, %r14 + movl %r9d, %esi + xorl %r10d, %r14d + roll $5, %esi + roll %r14d + addq %r11, %rsi + movq %r14, -16(%rdi) + movq -16(%rdi), %r8 + addq %r13, %rsi + movq -32(%rdi), %r11 + movq 56(%rdi), %rax + movl %r9d, %r14d + movq 8(%rdi), %rdx + movq -8(%rdi), %r10 + xorq %r15, %r9 + leaq 1859775393(%rsi,%r8), %r13 + xorq %r12, %r9 + roll $30, %r14d + xorq %rax, %r11 + xorq %rdx, %r11 + movl %r13d, %r8d + xorl %r10d, %r11d + roll $5, %r8d + movl %r13d, %r10d + roll %r11d + addq %r9, %r8 + xorq %r14, %r13 + movq %r11, -8(%rdi) + addq %rbx, %r8 + movq -8(%rdi), %rbx + movq -24(%rdi), %r9 + movq 64(%rdi), %rax + xorq %r15, %r13 + movq 16(%rdi), %rdx + movq (%rdi), %rcx + leaq 1859775393(%r8,%rbx), %r11 + xorq %rax, %r9 + xorq %rdx, %r9 + movl %r11d, %ebx + xorl %ecx, %r9d + roll $5, %ebx + roll %r9d + addq %r13, %rbx + movq %r9, (%rdi) + movq (%rdi), %rsi + addq %r12, %rbx + movq -16(%rdi), %r12 + movq 72(%rdi), %r13 + movl %r11d, %r9d + leaq 1859775393(%rbx,%rsi), %rcx + movl %r10d, %ebx + movq 24(%rdi), %r10 + movq 8(%rdi), %rax + xorq %r13, %r12 + roll $30, %ebx + movl %ecx, %esi + xorq %r10, %r12 + xorq %rbx, %r11 + roll $5, %esi + xorl %eax, %r12d + xorq %r14, %r11 + roll $30, %r9d + roll %r12d + addq %r11, %rsi + movq %rcx, %rax + movq %r12, 8(%rdi) + movq 8(%rdi), %rdx + addq %r15, %rsi + movq -8(%rdi), %r11 + movq -48(%rdi), %r13 + movl %ecx, %r12d + movq 32(%rdi), %r10 + movq 16(%rdi), %r8 + orq %r9, %rcx + leaq 1859775393(%rsi,%rdx), %rsi + andq %rbx, %rcx + andq %r9, %rax + xorq %r13, %r11 + orq %rcx, %rax + roll $30, %r12d + xorq %r10, %r11 + movq %rsi, %r10 + xorl %r8d, %r11d + movl %esi, %r8d + andq %r12, %r10 + roll %r11d + roll $5, %r8d + movq %r11, 16(%rdi) + addq %rax, %r8 + movq 16(%rdi), %r15 + movq (%rdi), %r13 + movq -40(%rdi), %rdx + addq %r14, %r8 + movq 40(%rdi), %r14 + movq 24(%rdi), %rcx + movl %esi, %r11d + addq %r15, %r8 + movl $2400959708, %r15d + orq %r12, %rsi + xorq %rdx, %r13 + addq %r15, %r8 + andq %r9, %rsi + xorq %r14, %r13 + orq %rsi, %r10 + xorl %ecx, %r13d + movl %r8d, %ecx + roll %r13d + roll $5, %ecx + movq %r13, 24(%rdi) + addq %r10, %rcx + movq 24(%rdi), %rax + movq 8(%rdi), %r14 + movq -32(%rdi), %rdx + addq %rbx, %rcx + movq 48(%rdi), %rbx + movq 32(%rdi), %rsi + roll $30, %r11d + addq %rax, %rcx + movl %r8d, %r13d + movq %r8, %r10 + xorq %rdx, %r14 + addq %r15, %rcx + orq %r11, %r8 + xorq %rbx, %r14 + andq %r12, %r8 + andq %r11, %r10 + xorl %esi, %r14d + movl %ecx, %esi + orq %r8, %r10 + roll $5, %esi + roll %r14d + roll $30, %r13d + addq %r10, %rsi + movq %r14, 32(%rdi) + movq 32(%rdi), %rax + addq %r9, %rsi + movq 16(%rdi), %r9 + movq -24(%rdi), %rdx + movq 56(%rdi), %rbx + movq 40(%rdi), %r8 + movl %ecx, %r14d + addq %rax, %rsi + movq %rcx, %r10 + orq %r13, %rcx + xorq %rdx, %r9 + addq %r15, %rsi + andq %r11, %rcx + xorq %rbx, %r9 + andq %r13, %r10 + roll $30, %r14d + xorl %r8d, %r9d + movl %esi, %r8d + orq %rcx, %r10 + roll %r9d + roll $5, %r8d + movq %r9, 40(%rdi) + addq %r10, %r8 + movq 40(%rdi), %rax + movq 24(%rdi), %r10 + movq -16(%rdi), %rdx + addq %r12, %r8 + movq 64(%rdi), %rbx + movq 48(%rdi), %rcx + movl %esi, %r9d + addq %rax, %r8 + movq %rsi, %r12 + xorq %rdx, %r10 + addq %r15, %r8 + xorq %rbx, %r10 + orq %r14, %rsi + andq %r14, %r12 + andq %r13, %rsi + xorl %ecx, %r10d + movl %r8d, %ecx + orq %rsi, %r12 + roll %r10d + roll $5, %ecx + movq %r10, 48(%rdi) + addq %r12, %rcx + movq 48(%rdi), %rax + movq 32(%rdi), %r12 + movq -8(%rdi), %rdx + addq %r11, %rcx + movq 72(%rdi), %rbx + movq 56(%rdi), %rsi + roll $30, %r9d + addq %rax, %rcx + movl %r8d, %r10d + movq %r8, %r11 + xorq %rdx, %r12 + addq %r15, %rcx + orq %r9, %r8 + xorq %rbx, %r12 + andq %r14, %r8 + andq %r9, %r11 + xorl %esi, %r12d + movl %ecx, %esi + orq %r8, %r11 + roll %r12d + roll $5, %esi + roll $30, %r10d + movq %r12, 56(%rdi) + addq %r11, %rsi + movq 56(%rdi), %rax + movq 40(%rdi), %r11 + movq (%rdi), %rdx + addq %r13, %rsi + movq -48(%rdi), %rbx + movq 64(%rdi), %r8 + movq %rcx, %r13 + addq %rax, %rsi + andq %r10, %r13 + movl %ecx, %r12d + xorq %rdx, %r11 + addq %r15, %rsi + xorq %rbx, %r11 + xorl %r8d, %r11d + movl %esi, %r8d + roll %r11d + roll $5, %r8d + orq %r10, %rcx + andq %r9, %rcx + movq %r11, 64(%rdi) + movq 64(%rdi), %rax + orq %rcx, %r13 + roll $30, %r12d + movl %esi, %r11d + addq %r13, %r8 + movq 48(%rdi), %r13 + movq 8(%rdi), %rdx + movq -40(%rdi), %rbx + addq %r14, %r8 + movq 72(%rdi), %rcx + addq %rax, %r8 + movq %rsi, %r14 + orq %r12, %rsi + xorq %rdx, %r13 + addq %r15, %r8 + andq %r10, %rsi + xorq %rbx, %r13 + andq %r12, %r14 + roll $30, %r11d + xorl %ecx, %r13d + movl %r8d, %ecx + orq %rsi, %r14 + roll %r13d + roll $5, %ecx + movq %r13, 72(%rdi) + addq %r14, %rcx + movq 72(%rdi), %rax + movq 56(%rdi), %r14 + movq 16(%rdi), %rdx + addq %r9, %rcx + movq -32(%rdi), %rbx + movq -48(%rdi), %rsi + movl %r8d, %r13d + addq %rax, %rcx + movq %r8, %r9 + orq %r11, %r8 + xorq %rdx, %r14 + addq %r15, %rcx + andq %r12, %r8 + xorq %rbx, %r14 + andq %r11, %r9 + xorl %esi, %r14d + movl %ecx, %esi + orq %r8, %r9 + roll $5, %esi + roll %r14d + addq %r9, %rsi + movq %r14, -48(%rdi) + movq -48(%rdi), %rax + addq %r10, %rsi + movq 64(%rdi), %r10 + movq 24(%rdi), %rdx + movq -24(%rdi), %rbx + movq -40(%rdi), %r8 + movl %ecx, %r14d + addq %rax, %rsi + roll $30, %r13d + movq %rcx, %r9 + xorq %rdx, %r10 + addq %r15, %rsi + orq %r13, %rcx + xorq %rbx, %r10 + andq %r11, %rcx + andq %r13, %r9 + xorl %r8d, %r10d + movl %esi, %r8d + orq %rcx, %r9 + roll $5, %r8d + roll %r10d + roll $30, %r14d + addq %r9, %r8 + movq %r10, -40(%rdi) + movq -40(%rdi), %rax + addq %r12, %r8 + movq 72(%rdi), %r12 + movq 32(%rdi), %rdx + movq -16(%rdi), %rbx + movq -32(%rdi), %rcx + movl %esi, %r10d + addq %rax, %r8 + movq %rsi, %r9 + orq %r14, %rsi + xorq %rdx, %r12 + addq %r15, %r8 + andq %r13, %rsi + xorq %rbx, %r12 + andq %r14, %r9 + roll $30, %r10d + xorl %ecx, %r12d + movl %r8d, %ecx + orq %rsi, %r9 + roll $5, %ecx + roll %r12d + addq %r9, %rcx + movq %r12, -32(%rdi) + movq -32(%rdi), %rax + addq %r11, %rcx + movq -48(%rdi), %r11 + movq 40(%rdi), %rdx + movq -8(%rdi), %rbx + movq -24(%rdi), %rsi + movl %r8d, %r12d + addq %rax, %rcx + movq %r8, %r9 + xorq %rdx, %r11 + addq %r15, %rcx + xorq %rbx, %r11 + xorl %esi, %r11d + orq %r10, %r8 + andq %r10, %r9 + andq %r14, %r8 + movl %ecx, %esi + roll %r11d + orq %r8, %r9 + roll $5, %esi + movq %r11, -24(%rdi) + addq %r9, %rsi + movq -24(%rdi), %rax + roll $30, %r12d + addq %r13, %rsi + movq -40(%rdi), %r13 + movq 48(%rdi), %rdx + movq (%rdi), %rbx + movq -16(%rdi), %r8 + movl %ecx, %r11d + addq %rax, %rsi + movq %rcx, %r9 + orq %r12, %rcx + xorq %rdx, %r13 + addq %r15, %rsi + andq %r10, %rcx + xorq %rbx, %r13 + andq %r12, %r9 + roll $30, %r11d + xorl %r8d, %r13d + movl %esi, %r8d + orq %rcx, %r9 + roll %r13d + roll $5, %r8d + movq %r13, -16(%rdi) + addq %r9, %r8 + movq -16(%rdi), %rax + movq -32(%rdi), %r9 + movq 56(%rdi), %rdx + addq %r14, %r8 + movq 8(%rdi), %rcx + movq -8(%rdi), %rbx + movl %esi, %r13d + addq %rax, %r8 + movq %rsi, %r14 + orq %r11, %rsi + xorq %rdx, %r9 + addq %r15, %r8 + andq %r11, %r14 + xorq %rcx, %r9 + xorl %ebx, %r9d + movl %r8d, %ebx + roll %r9d + roll $5, %ebx + andq %r12, %rsi + orq %rsi, %r14 + movq %r9, -8(%rdi) + movq -8(%rdi), %rax + addq %r14, %rbx + movq -24(%rdi), %r14 + movq 64(%rdi), %rdx + movq 16(%rdi), %rcx + addq %r10, %rbx + movq (%rdi), %rsi + roll $30, %r13d + addq %rax, %rbx + movl %r8d, %r9d + xorq %rdx, %r14 + addq %r15, %rbx + movq %r8, %r10 + xorq %rcx, %r14 + orq %r13, %r8 + andq %r13, %r10 + andq %r11, %r8 + xorl %esi, %r14d + movl %ebx, %esi + orq %r8, %r10 + roll $5, %esi + roll %r14d + addq %r10, %rsi + movq %r14, (%rdi) + movq (%rdi), %rax + addq %r12, %rsi + movq -16(%rdi), %r12 + movq 72(%rdi), %rdx + movq 24(%rdi), %rcx + movq 8(%rdi), %r8 + roll $30, %r9d + addq %rax, %rsi + movl %ebx, %r14d + movq %rbx, %r10 + xorq %rdx, %r12 + addq %r15, %rsi + orq %r9, %rbx + xorq %rcx, %r12 + andq %r13, %rbx + andq %r9, %r10 + xorl %r8d, %r12d + movl %esi, %r8d + orq %rbx, %r10 + roll %r12d + roll $5, %r8d + movq %r12, 8(%rdi) + movq 8(%rdi), %rax + addq %r10, %r8 + movq -8(%rdi), %rbx + movq -48(%rdi), %rdx + addq %r11, %r8 + movq 32(%rdi), %r11 + movq 16(%rdi), %rcx + movl %esi, %r12d + addq %rax, %r8 + movq %rsi, %r10 + addq %r15, %r8 + xorq %rdx, %rbx + roll $30, %r14d + xorq %r11, %rbx + orq %r14, %rsi + andq %r14, %r10 + xorl %ecx, %ebx + andq %r9, %rsi + movl %r8d, %ecx + roll %ebx + orq %rsi, %r10 + roll $5, %ecx + movq %rbx, 16(%rdi) + movq 16(%rdi), %rsi + addq %r10, %rcx + movq (%rdi), %r11 + movq -40(%rdi), %rax + addq %r13, %rcx + movq 40(%rdi), %rdx + movq 24(%rdi), %r13 + roll $30, %r12d + addq %rsi, %rcx + movl %r8d, %ebx + movq %r8, %r10 + xorq %rax, %r11 + addq %r15, %rcx + orq %r12, %r8 + xorq %rdx, %r11 + andq %r14, %r8 + andq %r12, %r10 + xorl %r13d, %r11d + movl %ecx, %r13d + orq %r8, %r10 + roll %r11d + roll $5, %r13d + roll $30, %ebx + movq %r11, 24(%rdi) + addq %r10, %r13 + movq 24(%rdi), %rsi + movq 8(%rdi), %r10 + movq -32(%rdi), %rax + addq %r9, %r13 + movq 48(%rdi), %rdx + movq 32(%rdi), %r8 + movl %ecx, %r11d + addq %rsi, %r13 + movq %rcx, %r9 + xorq %rax, %r10 + addq %r15, %r13 + xorq %rdx, %r10 + xorl %r8d, %r10d + movl %r13d, %r8d + roll %r10d + orq %rbx, %rcx + andq %rbx, %r9 + movq %r10, 32(%rdi) + andq %r12, %rcx + movl %r13d, %r10d + orq %rcx, %r9 + roll $5, %r10d + movq 32(%rdi), %rsi + addq %r9, %r10 + roll $30, %r11d + movq %r13, %rcx + addq %r14, %r10 + movq 16(%rdi), %r14 + movq -24(%rdi), %rax + movq 56(%rdi), %rdx + movq 40(%rdi), %r9 + addq %rsi, %r10 + addq %r15, %r10 + orq %r11, %r13 + andq %r11, %rcx + xorq %rax, %r14 + andq %rbx, %r13 + xorq %rdx, %r14 + orq %r13, %rcx + xorl %r9d, %r14d + movl %r10d, %r9d + roll %r14d + roll $5, %r9d + movq %r14, 40(%rdi) + movq 40(%rdi), %rsi + addq %rcx, %r9 + movq 24(%rdi), %r13 + addq %r12, %r9 + movq -16(%rdi), %r12 + movq 64(%rdi), %rax + movl %r10d, %r14d + addq %rsi, %r9 + movl %r8d, %esi + addq %r15, %r9 + movq 48(%rdi), %r15 + xorq %r12, %r13 + roll $30, %esi + xorq %rax, %r13 + xorq %rsi, %r10 + xorl %r15d, %r13d + movl %r9d, %r15d + xorq %r11, %r10 + roll $5, %r15d + roll %r13d + addq %r10, %r15 + movq %r13, 48(%rdi) + movq 48(%rdi), %r10 + addq %rbx, %r15 + movq 32(%rdi), %rbx + movq -8(%rdi), %r8 + movq 72(%rdi), %rdx + movq 56(%rdi), %rcx + roll $30, %r14d + addq %r10, %r15 + movl $3395469782, %r10d + movl %r9d, %r13d + xorq %r8, %rbx + addq %r10, %r15 + xorq %r14, %r9 + xorq %rdx, %rbx + xorq %rsi, %r9 + roll $30, %r13d + xorl %ecx, %ebx + movl %r15d, %ecx + roll %ebx + roll $5, %ecx + movq %rbx, 56(%rdi) + addq %r9, %rcx + movq 56(%rdi), %r12 + movq 40(%rdi), %r9 + movq (%rdi), %rax + addq %r11, %rcx + movq -48(%rdi), %r8 + movq 64(%rdi), %r11 + movl %r15d, %ebx + addq %r12, %rcx + xorq %r13, %r15 + roll $30, %ebx + xorq %rax, %r9 + addq %r10, %rcx + xorq %r14, %r15 + xorq %r8, %r9 + xorl %r11d, %r9d + movl %ecx, %r11d + roll %r9d + roll $5, %r11d + movq %r9, 64(%rdi) + addq %r15, %r11 + movq 64(%rdi), %rdx + movq 48(%rdi), %r15 + movq 8(%rdi), %r12 + addq %rsi, %r11 + movq -40(%rdi), %rax + movq 72(%rdi), %r8 + movl %ecx, %r9d + addq %rdx, %r11 + xorq %r12, %r15 + addq %r10, %r11 + xorq %rax, %r15 + xorl %r8d, %r15d + movl %r11d, %r8d + roll %r15d + roll $5, %r8d + xorq %rbx, %rcx + xorq %r13, %rcx + movq %r15, 72(%rdi) + movq 72(%rdi), %rsi + addq %rcx, %r8 + movq 56(%rdi), %r12 + movq 16(%rdi), %rcx + movq -32(%rdi), %rdx + addq %r14, %r8 + movq -48(%rdi), %r14 + addq %rsi, %r8 + roll $30, %r9d + movl %r11d, %r15d + xorq %rcx, %r12 + addq %r10, %r8 + xorq %r9, %r11 + xorq %rdx, %r12 + xorq %rbx, %r11 + roll $30, %r15d + xorl %r14d, %r12d + movl %r8d, %r14d + roll $5, %r14d + roll %r12d + addq %r11, %r14 + movq %r12, -48(%rdi) + movq -48(%rdi), %rax + addq %r13, %r14 + movq 64(%rdi), %r13 + movq 24(%rdi), %rsi + movq -24(%rdi), %rcx + movq -40(%rdi), %r11 + movl %r8d, %r12d + addq %rax, %r14 + xorq %r15, %r8 + roll $30, %r12d + xorq %rsi, %r13 + addq %r10, %r14 + xorq %r9, %r8 + xorq %rcx, %r13 + xorl %r11d, %r13d + movl %r14d, %r11d + roll $5, %r11d + roll %r13d + addq %r8, %r11 + movq %r13, -40(%rdi) + movq -40(%rdi), %rdx + addq %rbx, %r11 + movq 72(%rdi), %rbx + movq 32(%rdi), %rax + movq -16(%rdi), %rsi + movq -32(%rdi), %r8 + movl %r14d, %r13d + addq %rdx, %r11 + xorq %rax, %rbx + addq %r10, %r11 + xorq %rsi, %rbx + xorl %r8d, %ebx + xorq %r12, %r14 + movl %r11d, %r8d + xorq %r15, %r14 + roll %ebx + roll $5, %r8d + movq %rbx, -32(%rdi) + addq %r14, %r8 + movq -32(%rdi), %rcx + movq -48(%rdi), %r14 + movq 40(%rdi), %rdx + addq %r9, %r8 + movq -8(%rdi), %rax + movq -24(%rdi), %r9 + roll $30, %r13d + addq %rcx, %r8 + movl %r11d, %ebx + xorq %r13, %r11 + xorq %rdx, %r14 + addq %r10, %r8 + xorq %r12, %r11 + xorq %rax, %r14 + roll $30, %ebx + xorl %r9d, %r14d + movl %r8d, %r9d + roll $5, %r9d + roll %r14d + addq %r11, %r9 + movq %r14, -24(%rdi) + movq -24(%rdi), %rsi + addq %r15, %r9 + movq -40(%rdi), %r15 + movq 48(%rdi), %rcx + movq (%rdi), %rdx + movq -16(%rdi), %r11 + movl %r8d, %r14d + addq %rsi, %r9 + xorq %rbx, %r8 + xorq %rcx, %r15 + addq %r10, %r9 + xorq %r13, %r8 + xorq %rdx, %r15 + xorl %r11d, %r15d + movl %r9d, %r11d + roll %r15d + roll $5, %r11d + movq %r15, -16(%rdi) + addq %r8, %r11 + movq -16(%rdi), %rax + addq %r12, %r11 + movq -32(%rdi), %r12 + movq 56(%rdi), %rsi + movq 8(%rdi), %rcx + movq -8(%rdi), %r8 + movl %r9d, %r15d + addq %rax, %r11 + addq %r10, %r11 + roll $30, %r14d + xorq %rsi, %r12 + xorq %rcx, %r12 + xorq %r14, %r9 + roll $30, %r15d + xorl %r8d, %r12d + movl %r11d, %r8d + xorq %rbx, %r9 + roll $5, %r8d + roll %r12d + addq %r9, %r8 + movq %r12, -8(%rdi) + movq -8(%rdi), %rdx + addq %r13, %r8 + movq -24(%rdi), %r13 + movq 64(%rdi), %rax + movq 16(%rdi), %rsi + movq (%rdi), %rcx + movl %r11d, %r12d + addq %rdx, %r8 + xorq %r15, %r11 + roll $30, %r12d + xorq %rax, %r13 + addq %r10, %r8 + xorq %r14, %r11 + xorq %rsi, %r13 + xorl %ecx, %r13d + movl %r8d, %ecx + roll $5, %ecx + roll %r13d + addq %r11, %rcx + movq %r13, (%rdi) + movq (%rdi), %r9 + addq %rbx, %rcx + movq -16(%rdi), %rbx + movq 72(%rdi), %rdx + movq 24(%rdi), %rax + movq 8(%rdi), %rsi + movl %r8d, %r13d + addq %r9, %rcx + xorq %r12, %r8 + xorq %rdx, %rbx + addq %r10, %rcx + xorq %r15, %r8 + xorq %rax, %rbx + xorl %esi, %ebx + movl %ecx, %esi + roll $5, %esi + roll %ebx + addq %r8, %rsi + movq %rbx, 8(%rdi) + movq 8(%rdi), %r11 + addq %r14, %rsi + movq -8(%rdi), %r14 + movq -48(%rdi), %r9 + movq 32(%rdi), %rdx + movq 16(%rdi), %r8 + roll $30, %r13d + addq %r11, %rsi + movl %ecx, %ebx + xorq %r13, %rcx + xorq %r9, %r14 + addq %r10, %rsi + xorq %r12, %rcx + xorq %rdx, %r14 + roll $30, %ebx + xorl %r8d, %r14d + movl %esi, %r8d + roll $5, %r8d + roll %r14d + addq %rcx, %r8 + movq %r14, 16(%rdi) + movq 16(%rdi), %rax + addq %r15, %r8 + movq (%rdi), %r15 + movq -40(%rdi), %r11 + movq 40(%rdi), %r9 + movq 24(%rdi), %rcx + movl %esi, %r14d + addq %rax, %r8 + xorq %rbx, %rsi + roll $30, %r14d + xorq %r11, %r15 + addq %r10, %r8 + xorq %r13, %rsi + xorq %r9, %r15 + xorl %ecx, %r15d + movl %r8d, %ecx + roll %r15d + roll $5, %ecx + movq %r15, 24(%rdi) + addq %rsi, %rcx + movq 24(%rdi), %rdx + movq 8(%rdi), %r11 + movq -32(%rdi), %rax + addq %r12, %rcx + movq 48(%rdi), %r12 + movq 32(%rdi), %rsi + movl %r8d, %r15d + addq %rdx, %rcx + xorq %rax, %r11 + addq %r10, %rcx + xorq %r12, %r11 + xorl %esi, %r11d + movl %ecx, %esi + roll %r11d + movq %r11, 32(%rdi) + movl %ecx, %r11d + movq 32(%rdi), %r9 + roll $5, %r11d + xorq %r14, %r8 + movq 16(%rdi), %r12 + xorq %rbx, %r8 + movq -24(%rdi), %rdx + movq 56(%rdi), %rax + addq %r8, %r11 + movq 40(%rdi), %r8 + roll $30, %r15d + addq %r13, %r11 + xorq %r15, %rcx + addq %r9, %r11 + xorq %rdx, %r12 + xorq %r14, %rcx + addq %r10, %r11 + xorq %rax, %r12 + xorl %r8d, %r12d + movl %r11d, %r8d + roll $5, %r8d + roll %r12d + addq %rcx, %r8 + movq %r12, 40(%rdi) + movq 40(%rdi), %r13 + addq %rbx, %r8 + movq 24(%rdi), %rbx + movq -16(%rdi), %r9 + movq 64(%rdi), %rdx + movq 48(%rdi), %rcx + movl %r11d, %r12d + addq %r13, %r8 + movl %esi, %r13d + roll $30, %r12d + xorq %r9, %rbx + addq %r10, %r8 + roll $30, %r13d + xorq %rdx, %rbx + xorq %r13, %r11 + xorl %ecx, %ebx + movl %r8d, %ecx + xorq %r15, %r11 + roll %ebx + roll $5, %ecx + movq %rbx, 48(%rdi) + addq %r11, %rcx + movq 48(%rdi), %rax + movq 32(%rdi), %r11 + movq -8(%rdi), %rsi + addq %r14, %rcx + movq 72(%rdi), %r9 + movq 56(%rdi), %r14 + movl %r8d, %ebx + addq %rax, %rcx + xorq %rsi, %r11 + addq %r10, %rcx + xorq %r9, %r11 + xorl %r14d, %r11d + xorq %r12, %r8 + movl %ecx, %r14d + xorq %r13, %r8 + roll %r11d + roll $5, %r14d + movq %r11, 56(%rdi) + addq %r8, %r14 + movq 56(%rdi), %rdx + movq 40(%rdi), %r8 + movq (%rdi), %rax + addq %r15, %r14 + movq -48(%rdi), %r15 + movq 64(%rdi), %rsi + roll $30, %ebx + addq %rdx, %r14 + movl %ecx, %r11d + xorq %rbx, %rcx + xorq %rax, %r8 + addq %r10, %r14 + xorq %r12, %rcx + xorq %r15, %r8 + roll $30, %r11d + xorl %esi, %r8d + movl %r14d, %esi + roll %r8d + roll $5, %esi + movq %r8, 64(%rdi) + movq 64(%rdi), %r9 + addq %rcx, %rsi + movq 48(%rdi), %r15 + movq 8(%rdi), %rcx + addq %r13, %rsi + movq -40(%rdi), %rdx + movq 72(%rdi), %rax + movl %r14d, %r8d + addq %r9, %rsi + xorq %r11, %r14 + addq %r10, %rsi + xorq %rcx, %r15 + xorq %rbx, %r14 + xorq %rdx, %r15 + movl %esi, %r13d + xorl %eax, %r15d + roll $5, %r13d + roll %r15d + addq %r14, %r13 + movq %r15, 72(%rdi) + addq %r12, %r13 + movq 72(%rdi), %r12 + addq %r12, %r13 + addq %r10, %r13 + movq -88(%rdi), %r10 + roll $30, %r8d + addq %r13, %r10 + movq %r10, -88(%rdi) + movq -80(%rdi), %r9 + addq %rsi, %r9 + movq %r9, -80(%rdi) + movq -72(%rdi), %rcx + addq %r8, %rcx + movq %rcx, -72(%rdi) + movq -64(%rdi), %rdx + addq %r11, %rdx + movq %rdx, -64(%rdi) + movq -56(%rdi), %rax + addq %rbx, %rax + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + movq %rax, -56(%rdi) + ret +.LFE7: + .size shaCompress, .-shaCompress + .align 16 +.globl SHA1_Update + .type SHA1_Update, @function +SHA1_Update: +.LFB5: + pushq %rbp +.LCFI5: + movq %rsp, %rbp +.LCFI6: + movq %r13, -24(%rbp) +.LCFI7: + movq %r14, -16(%rbp) +.LCFI8: + movl %edx, %r13d + movq %r15, -8(%rbp) +.LCFI9: + movq %rbx, -40(%rbp) +.LCFI10: + movq %rdi, %r15 + movq %r12, -32(%rbp) +.LCFI11: + subq $48, %rsp +.LCFI12: + testl %edx, %edx + movq %rsi, %r14 + je .L243 + movq 64(%rdi), %rdx + mov %r13d, %ecx + leaq (%rdx,%rcx), %rax + movq %rax, 64(%rdi) + movl %edx, %eax + andl $63, %eax + movl %eax, -44(%rbp) + jne .L256 +.L245: + cmpl $63, %r13d + jbe .L253 + leaq 160(%r15), %rbx + .align 16 +.L250: + movq %r14, %rsi + subl $64, %r13d + movq %rbx, %rdi + call shaCompress + addq $64, %r14 + cmpl $63, %r13d + ja .L250 +.L253: + testl %r13d, %r13d + je .L243 + mov %r13d, %edx + movq %r14, %rsi + movq %r15, %rdi + movq -40(%rbp), %rbx + movq -32(%rbp), %r12 + movq -24(%rbp), %r13 + movq -16(%rbp), %r14 + movq -8(%rbp), %r15 + leave + jmp memcpy@PLT + .align 16 +.L243: + movq -40(%rbp), %rbx + movq -32(%rbp), %r12 + movq -24(%rbp), %r13 + movq -16(%rbp), %r14 + movq -8(%rbp), %r15 + leave + ret +.L256: + movl $64, %ebx + mov %eax, %edi + subl %eax, %ebx + cmpl %ebx, %r13d + cmovb %r13d, %ebx + addq %r15, %rdi + mov %ebx, %r12d + subl %ebx, %r13d + movq %r12, %rdx + addq %r12, %r14 + call memcpy@PLT + addl -44(%rbp), %ebx + andl $63, %ebx + jne .L245 + leaq 160(%r15), %rdi + movq %r15, %rsi + call shaCompress + jmp .L245 +.LFE5: + .size SHA1_Update, .-SHA1_Update + .section .rodata + .align 32 + .type bulk_pad.0, @object + .size bulk_pad.0, 64 +bulk_pad.0: + .byte -128 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .text + .align 16 +.globl SHA1_End + .type SHA1_End, @function +SHA1_End: +.LFB6: + pushq %rbp +.LCFI13: + movq %rsp, %rbp +.LCFI14: + movq %r12, -24(%rbp) +.LCFI15: + movq %r13, -16(%rbp) +.LCFI16: + movq %rsi, %r13 + movq %r14, -8(%rbp) +.LCFI17: + movq %rbx, -32(%rbp) +.LCFI18: + subq $32, %rsp +.LCFI19: + movq 64(%rdi), %rbx + movq %rdx, %r14 + movl $119, %edx + leaq bulk_pad.0(%rip), %rsi + movq %rdi, %r12 + movl %ebx, %r8d + salq $3, %rbx + andl $63, %r8d + subl %r8d, %edx + andl $63, %edx + incl %edx + call SHA1_Update@PLT + movq %rbx, %rdi + movq %r12, %rsi + shrq $32, %rdi +/APP + bswap %edi +/NO_APP + movl %edi, 56(%r12) + leaq 160(%r12), %rdi +/APP + bswap %ebx +/NO_APP + movl %ebx, 60(%r12) + call shaCompress + movl 72(%r12), %esi + movl 80(%r12), %ebx + movl 88(%r12), %ecx + movl 96(%r12), %edx + movl 104(%r12), %eax + movq 8(%rsp), %r12 +/APP + bswap %ebx + bswap %esi +/NO_APP + movl %ebx, 4(%r13) + movl %esi, (%r13) +/APP + bswap %ecx + bswap %edx +/NO_APP + movl %ecx, 8(%r13) + movl %edx, 12(%r13) +/APP + bswap %eax +/NO_APP + movq (%rsp), %rbx + movl %eax, 16(%r13) + cmpq $0, %r14 + je .L133 + movl $20, (%r14) +.L133: + movq 16(%rsp), %r13 + movq 24(%rsp), %r14 + leave + ret +.LFE6: + .size SHA1_End, .-SHA1_End + .align 16 +.globl SHA1_NewContext + .type SHA1_NewContext, @function +SHA1_NewContext: +.LFB8: + movl $248, %edi + jmp PORT_Alloc_Util@PLT +.LFE8: + .size SHA1_NewContext, .-SHA1_NewContext + .align 16 +.globl SHA1_DestroyContext + .type SHA1_DestroyContext, @function +SHA1_DestroyContext: +.LFB9: + pushq %rbp +.LCFI20: + movl $248, %edx + movq %rsp, %rbp +.LCFI21: + movq %rbx, -16(%rbp) +.LCFI22: + movq %r12, -8(%rbp) +.LCFI23: + movl %esi, %ebx + subq $16, %rsp +.LCFI24: + xorl %esi, %esi + movq %rdi, %r12 + call memset@PLT + testl %ebx, %ebx + jne .L268 + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + ret + .align 16 +.L268: + movq %r12, %rdi + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + jmp PORT_Free_Util@PLT +.LFE9: + .size SHA1_DestroyContext, .-SHA1_DestroyContext + .align 16 +.globl SHA1_HashBuf + .type SHA1_HashBuf, @function +SHA1_HashBuf: +.LFB10: + pushq %rbp +.LCFI25: + movq %rsp, %rbp +.LCFI26: + movq %rbx, -32(%rbp) +.LCFI27: + leaq -288(%rbp), %rbx + movq %r12, -24(%rbp) +.LCFI28: + movq %r13, -16(%rbp) +.LCFI29: + movq %r14, -8(%rbp) +.LCFI30: + movq %rsi, %r13 + subq $304, %rsp +.LCFI31: + movq %rdi, %r14 + movl %edx, %r12d + movq %rbx, %rdi + call SHA1_Begin@PLT + movl %r12d, %edx + movq %r13, %rsi + movq %rbx, %rdi + call SHA1_Update@PLT + leaq -292(%rbp), %rdx + movq %r14, %rsi + movq %rbx, %rdi + movl $20, %ecx + call SHA1_End@PLT + movq -32(%rbp), %rbx + movq -24(%rbp), %r12 + xorl %eax, %eax + movq -16(%rbp), %r13 + movq -8(%rbp), %r14 + leave + ret +.LFE10: + .size SHA1_HashBuf, .-SHA1_HashBuf + .align 16 +.globl SHA1_Hash + .type SHA1_Hash, @function +SHA1_Hash: +.LFB11: + pushq %rbp +.LCFI32: + movq %rsp, %rbp +.LCFI33: + movq %rbx, -16(%rbp) +.LCFI34: + movq %r12, -8(%rbp) +.LCFI35: + movq %rsi, %rbx + subq $16, %rsp +.LCFI36: + movq %rdi, %r12 + movq %rsi, %rdi + call strlen@PLT + movq %rbx, %rsi + movq %r12, %rdi + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + movl %eax, %edx + jmp SHA1_HashBuf@PLT +.LFE11: + .size SHA1_Hash, .-SHA1_Hash + .align 16 +.globl SHA1_FlattenSize + .type SHA1_FlattenSize, @function +SHA1_FlattenSize: +.LFB12: + movl $248, %eax + ret +.LFE12: + .size SHA1_FlattenSize, .-SHA1_FlattenSize + .align 16 +.globl SHA1_Flatten + .type SHA1_Flatten, @function +SHA1_Flatten: +.LFB13: + pushq %rbp +.LCFI37: + movq %rsi, %rax + movl $248, %edx + movq %rdi, %rsi + movq %rax, %rdi + movq %rsp, %rbp +.LCFI38: + call memcpy@PLT + leave + xorl %eax, %eax + ret +.LFE13: + .size SHA1_Flatten, .-SHA1_Flatten + .align 16 +.globl SHA1_Resurrect + .type SHA1_Resurrect, @function +SHA1_Resurrect: +.LFB14: + pushq %rbp +.LCFI39: + movq %rsp, %rbp +.LCFI40: + movq %rbx, -16(%rbp) +.LCFI41: + movq %r12, -8(%rbp) +.LCFI42: + subq $16, %rsp +.LCFI43: + movq %rdi, %r12 + call SHA1_NewContext@PLT + movq %rax, %rbx + xorl %eax, %eax + testq %rbx, %rbx + je .L273 + movl $248, %edx + movq %r12, %rsi + movq %rbx, %rdi + call memcpy@PLT + movq %rbx, %rax +.L273: + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + ret +.LFE14: + .size SHA1_Resurrect, .-SHA1_Resurrect + .align 16 +.globl SHA1_Clone + .type SHA1_Clone, @function +SHA1_Clone: +.LFB15: + movl $248, %edx + jmp memcpy@PLT +.LFE15: + .size SHA1_Clone, .-SHA1_Clone + .align 16 +.globl SHA1_TraceState + .type SHA1_TraceState, @function +SHA1_TraceState: +.LFB16: + movl $-5992, %edi + jmp PORT_SetError_Util@PLT +.LFE16: + .size SHA1_TraceState, .-SHA1_TraceState + .align 16 +.globl SHA1_EndRaw + .type SHA1_EndRaw, @function +SHA1_EndRaw: +.LFB50: + movq 72(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, (%rsi) + movq 80(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, 4(%rsi) + movq 88(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, 8(%rsi) + movq 96(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, 12(%rsi) + movq 104(%rdi), %rax +/APP + bswap %eax +/NO_APP + testq %rdx, %rdx + movl %eax, 16(%rsi) + je .L14 + movl $20, (%rdx) +.L14: + rep + ret +.LFE50: + .size SHA1_EndRaw, .-SHA1_EndRaw diff --git a/security/nss/lib/freebl/sha1-armv8.c b/security/nss/lib/freebl/sha1-armv8.c new file mode 100644 index 0000000000..63e4dad33e --- /dev/null +++ b/security/nss/lib/freebl/sha1-armv8.c @@ -0,0 +1,264 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef USE_HW_SHA1 + +#ifndef __ARM_FEATURE_CRYPTO +#error "Compiler option is invalid" +#endif + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include +#include "blapi.h" +#include "sha_fast.h" + +#if !defined(SHA_PUT_W_IN_STACK) +#define H2X 11 +#else +#define H2X 0 +#endif + +static void shaCompress(SHA_HW_t *X, const PRUint32 *datain); + +void +SHA1_Compress_Native(SHA1Context *ctx) +{ + shaCompress(&ctx->H[H2X], ctx->u.w); +} + +/* + * SHA: Add data to context. + */ +void +SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len) +{ + unsigned int lenB; + unsigned int togo; + + if (!len) { + return; + } + + /* accumulate the byte count. */ + lenB = (unsigned int)(ctx->size) & 63U; + + ctx->size += len; + + /* + * Read the data into W and process blocks as they get full + */ + if (lenB > 0) { + togo = 64U - lenB; + if (len < togo) { + togo = len; + } + memcpy(ctx->u.b + lenB, dataIn, togo); + len -= togo; + dataIn += togo; + lenB = (lenB + togo) & 63U; + if (!lenB) { + shaCompress(&ctx->H[H2X], ctx->u.w); + } + } + + while (len >= 64U) { + len -= 64U; + shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn); + dataIn += 64U; + } + + if (len) { + memcpy(ctx->u.b, dataIn, len); + } +} + +/* + * SHA: Compression function, unrolled. + */ +static void +shaCompress(SHA_HW_t *X, const PRUint32 *inbuf) +{ +#define XH(n) X[n - H2X] + + const uint32x4_t K0 = vdupq_n_u32(0x5a827999); + const uint32x4_t K1 = vdupq_n_u32(0x6ed9eba1); + const uint32x4_t K2 = vdupq_n_u32(0x8f1bbcdc); + const uint32x4_t K3 = vdupq_n_u32(0xca62c1d6); + + uint32x4_t abcd = vld1q_u32(&XH(0)); + PRUint32 e = XH(4); + + const uint32x4_t origABCD = abcd; + const PRUint32 origE = e; + + uint32x4_t w0 = vld1q_u32(inbuf); + uint32x4_t w1 = vld1q_u32(inbuf + 4); + uint32x4_t w2 = vld1q_u32(inbuf + 8); + uint32x4_t w3 = vld1q_u32(inbuf + 12); + + w0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w0))); + w1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w1))); + w2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w2))); + w3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w3))); + + uint32x4_t t0 = vaddq_u32(w0, K0); + uint32x4_t t1 = vaddq_u32(w1, K0); + + PRUint32 tmpE; + + /* + * Using the following ARM instructions to accelerate SHA1 + * + * sha1c for round 0 - 20 + * sha1p for round 20 - 40 + * sha1m for round 40 - 60 + * sha1p for round 60 - 80 + * sha1su0 and shasu1 for message schedule + * sha1h for rotate left 30 + */ + + /* Round 0-3 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1cq_u32(abcd, e, t0); + t0 = vaddq_u32(w2, K0); + w0 = vsha1su0q_u32(w0, w1, w2); + + /* Round 4-7 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1cq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w3, K0); + w0 = vsha1su1q_u32(w0, w3); + w1 = vsha1su0q_u32(w1, w2, w3); + + /* Round 8-11 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1cq_u32(abcd, e, t0); + t0 = vaddq_u32(w0, K0); + w1 = vsha1su1q_u32(w1, w0); + w2 = vsha1su0q_u32(w2, w3, w0); + + /* Round 12-15 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1cq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w1, K1); + w2 = vsha1su1q_u32(w2, w1); + w3 = vsha1su0q_u32(w3, w0, w1); + + /* Round 16-19 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1cq_u32(abcd, e, t0); + t0 = vaddq_u32(w2, K1); + w3 = vsha1su1q_u32(w3, w2); + w0 = vsha1su0q_u32(w0, w1, w2); + + /* Round 20-23 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w3, K1); + w0 = vsha1su1q_u32(w0, w3); + w1 = vsha1su0q_u32(w1, w2, w3); + + /* Round 24-27 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, e, t0); + t0 = vaddq_u32(w0, K1); + w1 = vsha1su1q_u32(w1, w0); + w2 = vsha1su0q_u32(w2, w3, w0); + + /* Round 28-31 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w1, K1); + w2 = vsha1su1q_u32(w2, w1); + w3 = vsha1su0q_u32(w3, w0, w1); + + /* Round 32-35 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, e, t0); + t0 = vaddq_u32(w2, K2); + w3 = vsha1su1q_u32(w3, w2); + w0 = vsha1su0q_u32(w0, w1, w2); + + /* Round 36-39 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w3, K2); + w0 = vsha1su1q_u32(w0, w3); + w1 = vsha1su0q_u32(w1, w2, w3); + + /* Round 40-43 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1mq_u32(abcd, e, t0); + t0 = vaddq_u32(w0, K2); + w1 = vsha1su1q_u32(w1, w0); + w2 = vsha1su0q_u32(w2, w3, w0); + + /* Round 44-47 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1mq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w1, K2); + w2 = vsha1su1q_u32(w2, w1); + w3 = vsha1su0q_u32(w3, w0, w1); + + /* Round 48-51 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1mq_u32(abcd, e, t0); + t0 = vaddq_u32(w2, K2); + w3 = vsha1su1q_u32(w3, w2); + w0 = vsha1su0q_u32(w0, w1, w2); + + /* Round 52-55 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1mq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w3, K3); + w0 = vsha1su1q_u32(w0, w3); + w1 = vsha1su0q_u32(w1, w2, w3); + + /* Round 56-59 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1mq_u32(abcd, e, t0); + t0 = vaddq_u32(w0, K3); + w1 = vsha1su1q_u32(w1, w0); + w2 = vsha1su0q_u32(w2, w3, w0); + + /* Round 60-63 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w1, K3); + w2 = vsha1su1q_u32(w2, w1); + w3 = vsha1su0q_u32(w3, w0, w1); + + /* Round 64-67 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, e, t0); + t0 = vaddq_u32(w2, K3); + w3 = vsha1su1q_u32(w3, w2); + w0 = vsha1su0q_u32(w0, w1, w2); + + /* Round 68-71 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, tmpE, t1); + t1 = vaddq_u32(w3, K3); + w0 = vsha1su1q_u32(w0, w3); + + /* Round 72-75 */ + tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, e, t0); + + /* Round 76-79 */ + e = vsha1h_u32(vgetq_lane_u32(abcd, 0)); + abcd = vsha1pq_u32(abcd, tmpE, t1); + + e += origE; + abcd = vaddq_u32(origABCD, abcd); + + vst1q_u32(&XH(0), abcd); + XH(4) = e; +} + +#endif /* USE_HW_SHA1 */ diff --git a/security/nss/lib/freebl/sha256-armv8.c b/security/nss/lib/freebl/sha256-armv8.c new file mode 100644 index 0000000000..17fe126c4c --- /dev/null +++ b/security/nss/lib/freebl/sha256-armv8.c @@ -0,0 +1,203 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef USE_HW_SHA2 + +#ifndef __ARM_FEATURE_CRYPTO +#error "Compiler option is invalid" +#endif + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prcpucfg.h" +#include "prtypes.h" /* for PRUintXX */ +#include "prlong.h" +#include "blapi.h" +#include "sha256.h" + +#include + +/* SHA-256 constants, K256. */ +static const PRUint32 __attribute__((aligned(16))) K256[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define ROUND(n, a, b, c, d) \ + { \ + uint32x4_t t = vaddq_u32(a, k##n); \ + uint32x4_t wt = w0; \ + w0 = vsha256hq_u32(w0, w1, t); \ + w1 = vsha256h2q_u32(w1, wt, t); \ + if (n < 12) { \ + a = vsha256su0q_u32(a, b); \ + a = vsha256su1q_u32(a, c, d); \ + } \ + } + +void +SHA256_Compress_Native(SHA256Context *ctx) +{ + const uint32x4_t k0 = vld1q_u32(K256); + const uint32x4_t k1 = vld1q_u32(K256 + 4); + const uint32x4_t k2 = vld1q_u32(K256 + 8); + const uint32x4_t k3 = vld1q_u32(K256 + 12); + const uint32x4_t k4 = vld1q_u32(K256 + 16); + const uint32x4_t k5 = vld1q_u32(K256 + 20); + const uint32x4_t k6 = vld1q_u32(K256 + 24); + const uint32x4_t k7 = vld1q_u32(K256 + 28); + const uint32x4_t k8 = vld1q_u32(K256 + 32); + const uint32x4_t k9 = vld1q_u32(K256 + 36); + const uint32x4_t k10 = vld1q_u32(K256 + 40); + const uint32x4_t k11 = vld1q_u32(K256 + 44); + const uint32x4_t k12 = vld1q_u32(K256 + 48); + const uint32x4_t k13 = vld1q_u32(K256 + 52); + const uint32x4_t k14 = vld1q_u32(K256 + 56); + const uint32x4_t k15 = vld1q_u32(K256 + 60); + + uint32x4_t h0 = vld1q_u32(ctx->h); + uint32x4_t h1 = vld1q_u32(ctx->h + 4); + + unsigned char *input = ctx->u.b; + + uint32x4_t a = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input))); + uint32x4_t b = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16))); + uint32x4_t c = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32))); + uint32x4_t d = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48))); + + uint32x4_t w0 = h0; + uint32x4_t w1 = h1; + + ROUND(0, a, b, c, d) + ROUND(1, b, c, d, a) + ROUND(2, c, d, a, b) + ROUND(3, d, a, b, c) + ROUND(4, a, b, c, d) + ROUND(5, b, c, d, a) + ROUND(6, c, d, a, b) + ROUND(7, d, a, b, c) + ROUND(8, a, b, c, d) + ROUND(9, b, c, d, a) + ROUND(10, c, d, a, b) + ROUND(11, d, a, b, c) + ROUND(12, a, b, c, d) + ROUND(13, b, c, d, a) + ROUND(14, c, d, a, b) + ROUND(15, d, a, b, c) + + h0 = vaddq_u32(h0, w0); + h1 = vaddq_u32(h1, w1); + + vst1q_u32(ctx->h, h0); + vst1q_u32(ctx->h + 4, h1); +} + +void +SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + const uint32x4_t k0 = vld1q_u32(K256); + const uint32x4_t k1 = vld1q_u32(K256 + 4); + const uint32x4_t k2 = vld1q_u32(K256 + 8); + const uint32x4_t k3 = vld1q_u32(K256 + 12); + const uint32x4_t k4 = vld1q_u32(K256 + 16); + const uint32x4_t k5 = vld1q_u32(K256 + 20); + const uint32x4_t k6 = vld1q_u32(K256 + 24); + const uint32x4_t k7 = vld1q_u32(K256 + 28); + const uint32x4_t k8 = vld1q_u32(K256 + 32); + const uint32x4_t k9 = vld1q_u32(K256 + 36); + const uint32x4_t k10 = vld1q_u32(K256 + 40); + const uint32x4_t k11 = vld1q_u32(K256 + 44); + const uint32x4_t k12 = vld1q_u32(K256 + 48); + const uint32x4_t k13 = vld1q_u32(K256 + 52); + const uint32x4_t k14 = vld1q_u32(K256 + 56); + const uint32x4_t k15 = vld1q_u32(K256 + 60); + + unsigned int inBuf = ctx->sizeLo & 0x3f; + if (!inputLen) { + return; + } + + /* Add inputLen into the count of bytes processed, before processing */ + if ((ctx->sizeLo += inputLen) < inputLen) { + ctx->sizeHi++; + } + + /* if data already in buffer, attemp to fill rest of buffer */ + if (inBuf) { + unsigned int todo = SHA256_BLOCK_LENGTH - inBuf; + if (inputLen < todo) { + todo = inputLen; + } + memcpy(ctx->u.b + inBuf, input, todo); + input += todo; + inputLen -= todo; + if (inBuf + todo == SHA256_BLOCK_LENGTH) { + SHA256_Compress_Native(ctx); + } + } + + uint32x4_t h0 = vld1q_u32(ctx->h); + uint32x4_t h1 = vld1q_u32(ctx->h + 4); + + /* if enough data to fill one or more whole buffers, process them. */ + while (inputLen >= SHA256_BLOCK_LENGTH) { + uint32x4_t a, b, c, d; + a = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input))); + b = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16))); + c = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32))); + d = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48))); + input += SHA256_BLOCK_LENGTH; + inputLen -= SHA256_BLOCK_LENGTH; + + uint32x4_t w0 = h0; + uint32x4_t w1 = h1; + + ROUND(0, a, b, c, d) + ROUND(1, b, c, d, a) + ROUND(2, c, d, a, b) + ROUND(3, d, a, b, c) + ROUND(4, a, b, c, d) + ROUND(5, b, c, d, a) + ROUND(6, c, d, a, b) + ROUND(7, d, a, b, c) + ROUND(8, a, b, c, d) + ROUND(9, b, c, d, a) + ROUND(10, c, d, a, b) + ROUND(11, d, a, b, c) + ROUND(12, a, b, c, d) + ROUND(13, b, c, d, a) + ROUND(14, c, d, a, b) + ROUND(15, d, a, b, c) + + h0 = vaddq_u32(h0, w0); + h1 = vaddq_u32(h1, w1); + } + + vst1q_u32(ctx->h, h0); + vst1q_u32(ctx->h + 4, h1); + + /* if data left over, fill it into buffer */ + if (inputLen) { + memcpy(ctx->u.b, input, inputLen); + } +} + +#endif /* USE_HW_SHA2 */ diff --git a/security/nss/lib/freebl/sha256-x86.c b/security/nss/lib/freebl/sha256-x86.c new file mode 100644 index 0000000000..3aa30e9ccc --- /dev/null +++ b/security/nss/lib/freebl/sha256-x86.c @@ -0,0 +1,236 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef USE_HW_SHA2 + +#include + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapii.h" +#include "prcpucfg.h" +#include "prtypes.h" /* for PRUintXX */ +#include "prlong.h" +#include "blapi.h" +#include "sha256.h" + +/* SHA-256 constants, K256. */ +pre_align static const PRUint32 K256[64] post_align = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define ROUND(n, a, b, c, d) \ + { \ + __m128i t = _mm_add_epi32(a, k##n); \ + w1 = _mm_sha256rnds2_epu32(w1, w0, t); \ + t = _mm_shuffle_epi32(t, 0x0e); \ + w0 = _mm_sha256rnds2_epu32(w0, w1, t); \ + if (n < 12) { \ + a = _mm_sha256msg1_epu32(a, b); \ + a = _mm_add_epi32(a, _mm_alignr_epi8(d, c, 4)); \ + a = _mm_sha256msg2_epu32(a, d); \ + } \ + } + +void +SHA256_Compress_Native(SHA256Context *ctx) +{ + __m128i h0, h1, th; + __m128i a, b, c, d; + __m128i w0, w1; + const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); + + const __m128i *K = (__m128i *)K256; + const __m128i k0 = _mm_load_si128(K); + const __m128i k1 = _mm_load_si128(K + 1); + const __m128i k2 = _mm_load_si128(K + 2); + const __m128i k3 = _mm_load_si128(K + 3); + const __m128i k4 = _mm_load_si128(K + 4); + const __m128i k5 = _mm_load_si128(K + 5); + const __m128i k6 = _mm_load_si128(K + 6); + const __m128i k7 = _mm_load_si128(K + 7); + const __m128i k8 = _mm_load_si128(K + 8); + const __m128i k9 = _mm_load_si128(K + 9); + const __m128i k10 = _mm_load_si128(K + 10); + const __m128i k11 = _mm_load_si128(K + 11); + const __m128i k12 = _mm_load_si128(K + 12); + const __m128i k13 = _mm_load_si128(K + 13); + const __m128i k14 = _mm_load_si128(K + 14); + const __m128i k15 = _mm_load_si128(K + 15); + + const __m128i *input = (__m128i *)ctx->u.b; + + h0 = _mm_loadu_si128((__m128i *)(ctx->h)); + h1 = _mm_loadu_si128((__m128i *)(ctx->h + 4)); + + /* H0123:4567 -> H01256:H2367 */ + th = _mm_shuffle_epi32(h0, 0xb1); + h1 = _mm_shuffle_epi32(h1, 0x1b); + h0 = _mm_alignr_epi8(th, h1, 8); + h1 = _mm_blend_epi16(h1, th, 0xf0); + + a = _mm_shuffle_epi8(_mm_loadu_si128(input), shuffle); + b = _mm_shuffle_epi8(_mm_loadu_si128(input + 1), shuffle); + c = _mm_shuffle_epi8(_mm_loadu_si128(input + 2), shuffle); + d = _mm_shuffle_epi8(_mm_loadu_si128(input + 3), shuffle); + + w0 = h0; + w1 = h1; + + ROUND(0, a, b, c, d) + ROUND(1, b, c, d, a) + ROUND(2, c, d, a, b) + ROUND(3, d, a, b, c) + ROUND(4, a, b, c, d) + ROUND(5, b, c, d, a) + ROUND(6, c, d, a, b) + ROUND(7, d, a, b, c) + ROUND(8, a, b, c, d) + ROUND(9, b, c, d, a) + ROUND(10, c, d, a, b) + ROUND(11, d, a, b, c) + ROUND(12, a, b, c, d) + ROUND(13, b, c, d, a) + ROUND(14, c, d, a, b) + ROUND(15, d, a, b, c) + + h0 = _mm_add_epi32(h0, w0); + h1 = _mm_add_epi32(h1, w1); + + /* H0145:2367 -> H0123:4567 */ + th = _mm_shuffle_epi32(h0, 0x1b); + h1 = _mm_shuffle_epi32(h1, 0xb1); + h0 = _mm_blend_epi16(th, h1, 0xf0); + h1 = _mm_alignr_epi8(h1, th, 8); + + _mm_storeu_si128((__m128i *)ctx->h, h0); + _mm_storeu_si128((__m128i *)(ctx->h + 4), h1); +} + +void +SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + __m128i h0, h1, th; + const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); + + const __m128i *K = (__m128i *)K256; + const __m128i k0 = _mm_load_si128(K); + const __m128i k1 = _mm_load_si128(K + 1); + const __m128i k2 = _mm_load_si128(K + 2); + const __m128i k3 = _mm_load_si128(K + 3); + const __m128i k4 = _mm_load_si128(K + 4); + const __m128i k5 = _mm_load_si128(K + 5); + const __m128i k6 = _mm_load_si128(K + 6); + const __m128i k7 = _mm_load_si128(K + 7); + const __m128i k8 = _mm_load_si128(K + 8); + const __m128i k9 = _mm_load_si128(K + 9); + const __m128i k10 = _mm_load_si128(K + 10); + const __m128i k11 = _mm_load_si128(K + 11); + const __m128i k12 = _mm_load_si128(K + 12); + const __m128i k13 = _mm_load_si128(K + 13); + const __m128i k14 = _mm_load_si128(K + 14); + const __m128i k15 = _mm_load_si128(K + 15); + + unsigned int inBuf = ctx->sizeLo & 0x3f; + if (!inputLen) { + return; + } + + /* Add inputLen into the count of bytes processed, before processing */ + if ((ctx->sizeLo += inputLen) < inputLen) { + ctx->sizeHi++; + } + + /* if data already in buffer, attempt to fill rest of buffer */ + if (inBuf) { + unsigned int todo = SHA256_BLOCK_LENGTH - inBuf; + if (inputLen < todo) { + todo = inputLen; + } + memcpy(ctx->u.b + inBuf, input, todo); + input += todo; + inputLen -= todo; + if (inBuf + todo == SHA256_BLOCK_LENGTH) { + SHA256_Compress_Native(ctx); + } + } + + h0 = _mm_loadu_si128((__m128i *)(ctx->h)); + h1 = _mm_loadu_si128((__m128i *)(ctx->h + 4)); + + /* H0123:4567 -> H01256:H2367 */ + th = _mm_shuffle_epi32(h0, 0xb1); + h1 = _mm_shuffle_epi32(h1, 0x1b); + h0 = _mm_alignr_epi8(th, h1, 8); + h1 = _mm_blend_epi16(h1, th, 0xf0); + + /* if enough data to fill one or more whole buffers, process them. */ + while (inputLen >= SHA256_BLOCK_LENGTH) { + __m128i a, b, c, d; + __m128i w0, w1; + a = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)input), shuffle); + b = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(input + 16)), shuffle); + c = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(input + 32)), shuffle); + d = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(input + 48)), shuffle); + input += SHA256_BLOCK_LENGTH; + inputLen -= SHA256_BLOCK_LENGTH; + + w0 = h0; + w1 = h1; + + ROUND(0, a, b, c, d) + ROUND(1, b, c, d, a) + ROUND(2, c, d, a, b) + ROUND(3, d, a, b, c) + ROUND(4, a, b, c, d) + ROUND(5, b, c, d, a) + ROUND(6, c, d, a, b) + ROUND(7, d, a, b, c) + ROUND(8, a, b, c, d) + ROUND(9, b, c, d, a) + ROUND(10, c, d, a, b) + ROUND(11, d, a, b, c) + ROUND(12, a, b, c, d) + ROUND(13, b, c, d, a) + ROUND(14, c, d, a, b) + ROUND(15, d, a, b, c) + + h0 = _mm_add_epi32(h0, w0); + h1 = _mm_add_epi32(h1, w1); + } + + // H01234567 -> H01256 and H2367 + th = _mm_shuffle_epi32(h0, 0x1b); + h1 = _mm_shuffle_epi32(h1, 0xb1); + h0 = _mm_blend_epi16(th, h1, 0xf0); + h1 = _mm_alignr_epi8(h1, th, 8); + + _mm_storeu_si128((__m128i *)ctx->h, h0); + _mm_storeu_si128((__m128i *)(ctx->h + 4), h1); + + /* if data left over, fill it into buffer */ + if (inputLen) { + memcpy(ctx->u.b, input, inputLen); + } +} + +#endif /* USE_HW_SHA2 */ diff --git a/security/nss/lib/freebl/sha256.h b/security/nss/lib/freebl/sha256.h new file mode 100644 index 0000000000..645118b07e --- /dev/null +++ b/security/nss/lib/freebl/sha256.h @@ -0,0 +1,27 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SHA_256_H_ +#define _SHA_256_H_ + +#include "prtypes.h" + +struct SHA256ContextStr; + +typedef void (*sha256_compress_t)(struct SHA256ContextStr *); +typedef void (*sha256_update_t)(struct SHA256ContextStr *, const unsigned char *, + unsigned int); + +struct SHA256ContextStr { + union { + PRUint32 w[64]; /* message schedule, input buffer, plus 48 words */ + PRUint8 b[256]; + } u; + PRUint32 h[8]; /* 8 state variables */ + PRUint32 sizeHi, sizeLo; /* 64-bit count of hashed bytes. */ + sha256_compress_t compress; + sha256_update_t update; +}; + +#endif /* _SHA_256_H_ */ diff --git a/security/nss/lib/freebl/sha512-p8.s b/security/nss/lib/freebl/sha512-p8.s new file mode 100644 index 0000000000..d84ec04780 --- /dev/null +++ b/security/nss/lib/freebl/sha512-p8.s @@ -0,0 +1,851 @@ +# Copyright (c) 2006, CRYPTOGAMS by +# All rights reserved. +# See the full LICENSE under scripts/. + +.machine "any" +.abiversion 2 +.text + +.globl sha512_block_p8 +.type sha512_block_p8,@function +.align 6 +sha512_block_p8: +.localentry sha512_block_p8,0 + + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + li 12,-1 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + or 11,11,11 + + bl .LPICmeup + addi 11,1,79 + li 7,8 + lvsl 31,0,7 + vspltisb 28,0x0f + vxor 31,31,28 + .long 0x7C001E99 + .long 0x7C4A1E99 + .long 0x7C9A1E99 + vsldoi 1,0,0,8 + .long 0x7CDB1E99 + vsldoi 3,2,2,8 + vsldoi 5,4,4,8 + vsldoi 7,6,6,8 + li 0,4 + b .Loop +.align 5 +.Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + .long 0x10E7E0C0 + lvx 28,10,6 + vperm 8,8,8,31 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7D402699 + addi 4,4,16 + vsldoi 9,8,8,8 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + vperm 10,10,10,31 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,8 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + vperm 12,12,12,31 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7DC02699 + addi 4,4,16 + vsldoi 13,12,12,8 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + vperm 14,14,14,31 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,8 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + vperm 16,16,16,31 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7E402699 + addi 4,4,16 + vsldoi 17,16,16,8 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + vperm 18,18,18,31 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,8 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + vperm 24,24,24,31 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7F402699 + addi 4,4,16 + vsldoi 25,24,24,8 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + vperm 26,26,26,31 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + vsldoi 27,26,26,8 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + mtctr 0 + b .L16_xx +.align 5 +.L16_xx: + .long 0x13CA06C2 + .long 0x1129F0C0 + .long 0x13DB7EC2 + .long 0x1129F0C0 + .long 0x112990C0 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13CB06C2 + .long 0x114AF0C0 + .long 0x13C87EC2 + .long 0x114AF0C0 + .long 0x114A98C0 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13CC06C2 + .long 0x116BF0C0 + .long 0x13C97EC2 + .long 0x116BF0C0 + .long 0x116BC0C0 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13CD06C2 + .long 0x118CF0C0 + .long 0x13CA7EC2 + .long 0x118CF0C0 + .long 0x118CC8C0 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13CE06C2 + .long 0x11ADF0C0 + .long 0x13CB7EC2 + .long 0x11ADF0C0 + .long 0x11ADD0C0 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13CF06C2 + .long 0x11CEF0C0 + .long 0x13CC7EC2 + .long 0x11CEF0C0 + .long 0x11CED8C0 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D006C2 + .long 0x11EFF0C0 + .long 0x13CD7EC2 + .long 0x11EFF0C0 + .long 0x11EF40C0 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13D106C2 + .long 0x1210F0C0 + .long 0x13CE7EC2 + .long 0x1210F0C0 + .long 0x121048C0 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x13D206C2 + .long 0x1231F0C0 + .long 0x13CF7EC2 + .long 0x1231F0C0 + .long 0x123150C0 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13D306C2 + .long 0x1252F0C0 + .long 0x13D07EC2 + .long 0x1252F0C0 + .long 0x125258C0 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13D806C2 + .long 0x1273F0C0 + .long 0x13D17EC2 + .long 0x1273F0C0 + .long 0x127360C0 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13D906C2 + .long 0x1318F0C0 + .long 0x13D27EC2 + .long 0x1318F0C0 + .long 0x131868C0 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13DA06C2 + .long 0x1339F0C0 + .long 0x13D37EC2 + .long 0x1339F0C0 + .long 0x133970C0 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13DB06C2 + .long 0x135AF0C0 + .long 0x13D87EC2 + .long 0x135AF0C0 + .long 0x135A78C0 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C806C2 + .long 0x137BF0C0 + .long 0x13D97EC2 + .long 0x137BF0C0 + .long 0x137B80C0 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + bdnz .L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + .long 0x100050C0 + lvx 12,26,11 + .long 0x102158C0 + lvx 13,27,11 + .long 0x104260C0 + lvx 14,28,11 + .long 0x106368C0 + lvx 15,29,11 + .long 0x108470C0 + lvx 16,30,11 + .long 0x10A578C0 + lvx 17,31,11 + .long 0x10C680C0 + .long 0x10E788C0 + bne .Loop + vperm 0,0,1,28 + vperm 2,2,3,28 + vperm 4,4,5,28 + vperm 6,6,7,28 + .long 0x7C001F99 + .long 0x7C4A1F99 + .long 0x7C9A1F99 + .long 0x7CDB1F99 + addi 11,1,207 + mtlr 8 + or 12,12,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 +.size sha512_block_p8,.-sha512_block_p8 +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0xd728ae22,0x428a2f98 +.long 0xd728ae22,0x428a2f98 +.long 0x23ef65cd,0x71374491 +.long 0x23ef65cd,0x71374491 +.long 0xec4d3b2f,0xb5c0fbcf +.long 0xec4d3b2f,0xb5c0fbcf +.long 0x8189dbbc,0xe9b5dba5 +.long 0x8189dbbc,0xe9b5dba5 +.long 0xf348b538,0x3956c25b +.long 0xf348b538,0x3956c25b +.long 0xb605d019,0x59f111f1 +.long 0xb605d019,0x59f111f1 +.long 0xaf194f9b,0x923f82a4 +.long 0xaf194f9b,0x923f82a4 +.long 0xda6d8118,0xab1c5ed5 +.long 0xda6d8118,0xab1c5ed5 +.long 0xa3030242,0xd807aa98 +.long 0xa3030242,0xd807aa98 +.long 0x45706fbe,0x12835b01 +.long 0x45706fbe,0x12835b01 +.long 0x4ee4b28c,0x243185be +.long 0x4ee4b28c,0x243185be +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xf27b896f,0x72be5d74 +.long 0xf27b896f,0x72be5d74 +.long 0x3b1696b1,0x80deb1fe +.long 0x3b1696b1,0x80deb1fe +.long 0x25c71235,0x9bdc06a7 +.long 0x25c71235,0x9bdc06a7 +.long 0xcf692694,0xc19bf174 +.long 0xcf692694,0xc19bf174 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x384f25e3,0xefbe4786 +.long 0x384f25e3,0xefbe4786 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x77ac9c65,0x240ca1cc +.long 0x77ac9c65,0x240ca1cc +.long 0x592b0275,0x2de92c6f +.long 0x592b0275,0x2de92c6f +.long 0x6ea6e483,0x4a7484aa +.long 0x6ea6e483,0x4a7484aa +.long 0xbd41fbd4,0x5cb0a9dc +.long 0xbd41fbd4,0x5cb0a9dc +.long 0x831153b5,0x76f988da +.long 0x831153b5,0x76f988da +.long 0xee66dfab,0x983e5152 +.long 0xee66dfab,0x983e5152 +.long 0x2db43210,0xa831c66d +.long 0x2db43210,0xa831c66d +.long 0x98fb213f,0xb00327c8 +.long 0x98fb213f,0xb00327c8 +.long 0xbeef0ee4,0xbf597fc7 +.long 0xbeef0ee4,0xbf597fc7 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x930aa725,0xd5a79147 +.long 0x930aa725,0xd5a79147 +.long 0xe003826f,0x06ca6351 +.long 0xe003826f,0x06ca6351 +.long 0x0a0e6e70,0x14292967 +.long 0x0a0e6e70,0x14292967 +.long 0x46d22ffc,0x27b70a85 +.long 0x46d22ffc,0x27b70a85 +.long 0x5c26c926,0x2e1b2138 +.long 0x5c26c926,0x2e1b2138 +.long 0x5ac42aed,0x4d2c6dfc +.long 0x5ac42aed,0x4d2c6dfc +.long 0x9d95b3df,0x53380d13 +.long 0x9d95b3df,0x53380d13 +.long 0x8baf63de,0x650a7354 +.long 0x8baf63de,0x650a7354 +.long 0x3c77b2a8,0x766a0abb +.long 0x3c77b2a8,0x766a0abb +.long 0x47edaee6,0x81c2c92e +.long 0x47edaee6,0x81c2c92e +.long 0x1482353b,0x92722c85 +.long 0x1482353b,0x92722c85 +.long 0x4cf10364,0xa2bfe8a1 +.long 0x4cf10364,0xa2bfe8a1 +.long 0xbc423001,0xa81a664b +.long 0xbc423001,0xa81a664b +.long 0xd0f89791,0xc24b8b70 +.long 0xd0f89791,0xc24b8b70 +.long 0x0654be30,0xc76c51a3 +.long 0x0654be30,0xc76c51a3 +.long 0xd6ef5218,0xd192e819 +.long 0xd6ef5218,0xd192e819 +.long 0x5565a910,0xd6990624 +.long 0x5565a910,0xd6990624 +.long 0x5771202a,0xf40e3585 +.long 0x5771202a,0xf40e3585 +.long 0x32bbd1b8,0x106aa070 +.long 0x32bbd1b8,0x106aa070 +.long 0xb8d2d0c8,0x19a4c116 +.long 0xb8d2d0c8,0x19a4c116 +.long 0x5141ab53,0x1e376c08 +.long 0x5141ab53,0x1e376c08 +.long 0xdf8eeb99,0x2748774c +.long 0xdf8eeb99,0x2748774c +.long 0xe19b48a8,0x34b0bcb5 +.long 0xe19b48a8,0x34b0bcb5 +.long 0xc5c95a63,0x391c0cb3 +.long 0xc5c95a63,0x391c0cb3 +.long 0xe3418acb,0x4ed8aa4a +.long 0xe3418acb,0x4ed8aa4a +.long 0x7763e373,0x5b9cca4f +.long 0x7763e373,0x5b9cca4f +.long 0xd6b2b8a3,0x682e6ff3 +.long 0xd6b2b8a3,0x682e6ff3 +.long 0x5defb2fc,0x748f82ee +.long 0x5defb2fc,0x748f82ee +.long 0x43172f60,0x78a5636f +.long 0x43172f60,0x78a5636f +.long 0xa1f0ab72,0x84c87814 +.long 0xa1f0ab72,0x84c87814 +.long 0x1a6439ec,0x8cc70208 +.long 0x1a6439ec,0x8cc70208 +.long 0x23631e28,0x90befffa +.long 0x23631e28,0x90befffa +.long 0xde82bde9,0xa4506ceb +.long 0xde82bde9,0xa4506ceb +.long 0xb2c67915,0xbef9a3f7 +.long 0xb2c67915,0xbef9a3f7 +.long 0xe372532b,0xc67178f2 +.long 0xe372532b,0xc67178f2 +.long 0xea26619c,0xca273ece +.long 0xea26619c,0xca273ece +.long 0x21c0c207,0xd186b8c7 +.long 0x21c0c207,0xd186b8c7 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xee6ed178,0xf57d4f7f +.long 0xee6ed178,0xf57d4f7f +.long 0x72176fba,0x06f067aa +.long 0x72176fba,0x06f067aa +.long 0xa2c898a6,0x0a637dc5 +.long 0xa2c898a6,0x0a637dc5 +.long 0xbef90dae,0x113f9804 +.long 0xbef90dae,0x113f9804 +.long 0x131c471b,0x1b710b35 +.long 0x131c471b,0x1b710b35 +.long 0x23047d84,0x28db77f5 +.long 0x23047d84,0x28db77f5 +.long 0x40c72493,0x32caab7b +.long 0x40c72493,0x32caab7b +.long 0x15c9bebc,0x3c9ebe0a +.long 0x15c9bebc,0x3c9ebe0a +.long 0x9c100d4c,0x431d67c4 +.long 0x9c100d4c,0x431d67c4 +.long 0xcb3e42b6,0x4cc5d4be +.long 0xcb3e42b6,0x4cc5d4be +.long 0xfc657e2a,0x597f299c +.long 0xfc657e2a,0x597f299c +.long 0x3ad6faec,0x5fcb6fab +.long 0x3ad6faec,0x5fcb6fab +.long 0x4a475817,0x6c44198c +.long 0x4a475817,0x6c44198c +.long 0,0 +.long 0,0 +.long 0x14151617,0x10111213 +.long 0x04050607,0x00010203 +.byte 83,72,65,53,49,50,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 diff --git a/security/nss/lib/freebl/sha512.c b/security/nss/lib/freebl/sha512.c new file mode 100644 index 0000000000..14584a8906 --- /dev/null +++ b/security/nss/lib/freebl/sha512.c @@ -0,0 +1,1776 @@ +/* + * sha512.c - implementation of SHA224, SHA256, SHA384 and SHA512 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prcpucfg.h" +#if defined(NSS_X86) || defined(SHA_NO_LONG_LONG) +#define NOUNROLL512 1 +#undef HAVE_LONG_LONG +#endif +#include "prtypes.h" /* for PRUintXX */ +#include "prlong.h" +#include "secport.h" /* for PORT_XXX */ +#include "blapi.h" +#include "blapii.h" +#include "secerr.h" +#include "sha256.h" /* for struct SHA256ContextStr */ +#include "crypto_primitives.h" +#include "ppc-crypto.h" /* for USE_PPC_CRYPTO */ + +/* ============= Common constants and defines ======================= */ + +#define W ctx->u.w +#define B ctx->u.b +#define H ctx->h + +#define SHR(x, n) (x >> n) +#define SHL(x, n) (x << n) +#define Ch(x, y, z) ((x & y) ^ (~x & z)) +#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define SHA_MIN(a, b) (a < b ? a : b) + +/* Padding used with all flavors of SHA */ +static const PRUint8 pad[240] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + /* compiler will fill the rest in with zeros */ +}; + +/* ============= SHA256 implementation ================================== */ + +/* SHA-256 constants, K256. */ +pre_align static const PRUint32 K256[64] post_align = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +/* SHA-256 initial hash values */ +static const PRUint32 H256[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +#if defined(IS_LITTLE_ENDIAN) +#if (_MSC_VER >= 1300) +#include +#pragma intrinsic(_byteswap_ulong) +#define SHA_HTONL(x) _byteswap_ulong(x) +#elif defined(_MSC_VER) && defined(NSS_X86_OR_X64) +#ifndef FORCEINLINE +#if (_MSC_VER >= 1200) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE __inline +#endif +#endif +#define FASTCALL __fastcall + +static FORCEINLINE PRUint32 FASTCALL +swap4b(PRUint32 dwd) +{ + __asm { + mov eax,dwd + bswap eax + } +} + +#define SHA_HTONL(x) swap4b(x) + +#elif defined(__GNUC__) && defined(NSS_X86_OR_X64) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + __asm__("bswap %0" + : "+r"(value)); + return (value); +} +#define SHA_HTONL(x) swap4b(x) + +#elif defined(__GNUC__) && (defined(__thumb2__) || \ + (!defined(__thumb__) && \ + (defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)))) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + PRUint32 ret; + __asm__("rev %0, %1" + : "=r"(ret) + : "r"(value)); + return ret; +} +#define SHA_HTONL(x) swap4b(x) + +#else +#define SWAP4MASK 0x00FF00FF +static PRUint32 +swap4b(PRUint32 value) +{ + PRUint32 t1 = (value << 16) | (value >> 16); + return ((t1 & SWAP4MASK) << 8) | ((t1 >> 8) & SWAP4MASK); +} +#define SHA_HTONL(x) swap4b(x) +#endif +#define BYTESWAP4(x) x = SHA_HTONL(x) +#endif /* defined(IS_LITTLE_ENDIAN) */ + +#if defined(_MSC_VER) +#pragma intrinsic(_lrotr, _lrotl) +#define ROTR32(x, n) _lrotr(x, n) +#define ROTL32(x, n) _lrotl(x, n) +#else +#define ROTR32(x, n) ((x >> n) | (x << ((8 * sizeof x) - n))) +#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n))) +#endif + +/* Capitol Sigma and lower case sigma functions */ +#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) +#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) +#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ SHR(x, 10)) + +void SHA256_Compress_Native(SHA256Context *ctx); +void SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input, unsigned int inputLen); + +static void SHA256_Compress_Generic(SHA256Context *ctx); +static void SHA256_Update_Generic(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen); + +#if !defined(USE_HW_SHA2) +void +SHA256_Compress_Native(SHA256Context *ctx) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} +#endif + +SHA256Context * +SHA256_NewContext(void) +{ + SHA256Context *ctx = PORT_New(SHA256Context); + return ctx; +} + +void +SHA256_DestroyContext(SHA256Context *ctx, PRBool freeit) +{ + memset(ctx, 0, sizeof *ctx); + if (freeit) { + PORT_Free(ctx); + } +} + +void +SHA256_Begin(SHA256Context *ctx) +{ + PRBool use_hw_sha2 = PR_FALSE; + + memset(ctx, 0, sizeof *ctx); + memcpy(H, H256, sizeof H256); + +#if defined(USE_HW_SHA2) && defined(IS_LITTLE_ENDIAN) + /* arm's implementation is tested on little endian only */ + use_hw_sha2 = arm_sha2_support() || (sha_support() && ssse3_support() && sse4_1_support()); +#endif + + if (use_hw_sha2) { + ctx->compress = SHA256_Compress_Native; + ctx->update = SHA256_Update_Native; + } else { + ctx->compress = SHA256_Compress_Generic; + ctx->update = SHA256_Update_Generic; + } +} + +#if defined(USE_PPC_CRYPTO) + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + s0 = __builtin_crypto_vshasigmaw(e, 1, 0xf); \ + h += s0 + vec_sel(g, f, e) + w[n / 4]; \ + d += h; \ + s0 = __builtin_crypto_vshasigmaw(a, 1, 0); \ + h += s0 + vec_sel(b, c, vec_xor(a, b)); \ + if (n % 4 != 3) \ + w[n / 4] = vec_sro(w[n / 4], rshift); + +#else + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + h += S1(e) + Ch(e, f, g) + K256[n] + W[n]; \ + d += h; \ + h += S0(a) + Maj(a, b, c); + +#endif + +#define SHA256_UNROLLED_ROUNDS \ + ROUND(0, a, b, c, d, e, f, g, h) \ + ROUND(1, h, a, b, c, d, e, f, g) \ + ROUND(2, g, h, a, b, c, d, e, f) \ + ROUND(3, f, g, h, a, b, c, d, e) \ + ROUND(4, e, f, g, h, a, b, c, d) \ + ROUND(5, d, e, f, g, h, a, b, c) \ + ROUND(6, c, d, e, f, g, h, a, b) \ + ROUND(7, b, c, d, e, f, g, h, a) \ + \ + ROUND(8, a, b, c, d, e, f, g, h) \ + ROUND(9, h, a, b, c, d, e, f, g) \ + ROUND(10, g, h, a, b, c, d, e, f) \ + ROUND(11, f, g, h, a, b, c, d, e) \ + ROUND(12, e, f, g, h, a, b, c, d) \ + ROUND(13, d, e, f, g, h, a, b, c) \ + ROUND(14, c, d, e, f, g, h, a, b) \ + ROUND(15, b, c, d, e, f, g, h, a) \ + \ + ROUND(16, a, b, c, d, e, f, g, h) \ + ROUND(17, h, a, b, c, d, e, f, g) \ + ROUND(18, g, h, a, b, c, d, e, f) \ + ROUND(19, f, g, h, a, b, c, d, e) \ + ROUND(20, e, f, g, h, a, b, c, d) \ + ROUND(21, d, e, f, g, h, a, b, c) \ + ROUND(22, c, d, e, f, g, h, a, b) \ + ROUND(23, b, c, d, e, f, g, h, a) \ + \ + ROUND(24, a, b, c, d, e, f, g, h) \ + ROUND(25, h, a, b, c, d, e, f, g) \ + ROUND(26, g, h, a, b, c, d, e, f) \ + ROUND(27, f, g, h, a, b, c, d, e) \ + ROUND(28, e, f, g, h, a, b, c, d) \ + ROUND(29, d, e, f, g, h, a, b, c) \ + ROUND(30, c, d, e, f, g, h, a, b) \ + ROUND(31, b, c, d, e, f, g, h, a) \ + \ + ROUND(32, a, b, c, d, e, f, g, h) \ + ROUND(33, h, a, b, c, d, e, f, g) \ + ROUND(34, g, h, a, b, c, d, e, f) \ + ROUND(35, f, g, h, a, b, c, d, e) \ + ROUND(36, e, f, g, h, a, b, c, d) \ + ROUND(37, d, e, f, g, h, a, b, c) \ + ROUND(38, c, d, e, f, g, h, a, b) \ + ROUND(39, b, c, d, e, f, g, h, a) \ + \ + ROUND(40, a, b, c, d, e, f, g, h) \ + ROUND(41, h, a, b, c, d, e, f, g) \ + ROUND(42, g, h, a, b, c, d, e, f) \ + ROUND(43, f, g, h, a, b, c, d, e) \ + ROUND(44, e, f, g, h, a, b, c, d) \ + ROUND(45, d, e, f, g, h, a, b, c) \ + ROUND(46, c, d, e, f, g, h, a, b) \ + ROUND(47, b, c, d, e, f, g, h, a) \ + \ + ROUND(48, a, b, c, d, e, f, g, h) \ + ROUND(49, h, a, b, c, d, e, f, g) \ + ROUND(50, g, h, a, b, c, d, e, f) \ + ROUND(51, f, g, h, a, b, c, d, e) \ + ROUND(52, e, f, g, h, a, b, c, d) \ + ROUND(53, d, e, f, g, h, a, b, c) \ + ROUND(54, c, d, e, f, g, h, a, b) \ + ROUND(55, b, c, d, e, f, g, h, a) \ + \ + ROUND(56, a, b, c, d, e, f, g, h) \ + ROUND(57, h, a, b, c, d, e, f, g) \ + ROUND(58, g, h, a, b, c, d, e, f) \ + ROUND(59, f, g, h, a, b, c, d, e) \ + ROUND(60, e, f, g, h, a, b, c, d) \ + ROUND(61, d, e, f, g, h, a, b, c) \ + ROUND(62, c, d, e, f, g, h, a, b) \ + ROUND(63, b, c, d, e, f, g, h, a) + +static void +SHA256_Compress_Generic(SHA256Context *ctx) +{ +#if defined(USE_PPC_CRYPTO) + vec_u32 w[16], s0, s1; + const vec_u8 rshift = (vec_u8)vec_splats(4 << 3); + const vec_u8 shifthalf = (vec_u8)vec_splats(8 << 3); + const vec_u8 bswap4 = (vec_u8){ + 3, 2, 1, 0, 7, 6, 5, 4, 11, + 10, 9, 8, 15, 14, 13, 12 + }; + unsigned i; + + for (i = 0; i < 4; i++) { + w[i] = vec_vsx_ld(0, &W[i * 4]); + w[i] = vec_perm(w[i], w[i], bswap4); + } + + /* prepare the message schedule */ + for (i = 4; i < 16; i++) { + vec_u32 off1 = vec_sld(w[i - 3], w[i - 4], 12); + vec_u32 off2 = vec_sld(w[i - 1], w[i - 2], 12); + s0 = __builtin_crypto_vshasigmaw(off1, 0, 0); + /* first half, s1 depends on two prior ints */ + s1 = __builtin_crypto_vshasigmaw(w[i - 1], 0, 0xf); + s1 = vec_sro(s1, shifthalf); + w[i] = w[i - 4] + s0 + off2 + s1; + + /* second half s1 */ + s1 = __builtin_crypto_vshasigmaw(w[i], 0, 0xf); + s1 = vec_slo(s1, shifthalf); + w[i] += s1; + } + + for (i = 0; i < 16; i++) { + w[i] += vec_ld(0, &K256[i * 4]); + } + + vec_u32 a, b, c, d, e, f, g, h; + a = vec_splats(H[0]); + b = vec_splats(H[1]); + c = vec_splats(H[2]); + d = vec_splats(H[3]); + e = vec_splats(H[4]); + f = vec_splats(H[5]); + g = vec_splats(H[6]); + h = vec_splats(H[7]); + + SHA256_UNROLLED_ROUNDS; + + H[0] += a[0]; + H[1] += b[0]; + H[2] += c[0]; + H[3] += d[0]; + H[4] += e[0]; + H[5] += f[0]; + H[6] += g[0]; + H[7] += h[0]; + +#undef ROUND + +#else /* USE_PPC_CRYPTO*/ + + { +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP4(W[0]); + BYTESWAP4(W[1]); + BYTESWAP4(W[2]); + BYTESWAP4(W[3]); + BYTESWAP4(W[4]); + BYTESWAP4(W[5]); + BYTESWAP4(W[6]); + BYTESWAP4(W[7]); + BYTESWAP4(W[8]); + BYTESWAP4(W[9]); + BYTESWAP4(W[10]); + BYTESWAP4(W[11]); + BYTESWAP4(W[12]); + BYTESWAP4(W[13]); + BYTESWAP4(W[14]); + BYTESWAP4(W[15]); +#endif + +#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16]) + +/* prepare the "message schedule" */ +#ifdef NOUNROLL256 + { + int t; + for (t = 16; t < 64; ++t) { + INITW(t); + } + } +#else + INITW(16); + INITW(17); + INITW(18); + INITW(19); + + INITW(20); + INITW(21); + INITW(22); + INITW(23); + INITW(24); + INITW(25); + INITW(26); + INITW(27); + INITW(28); + INITW(29); + + INITW(30); + INITW(31); + INITW(32); + INITW(33); + INITW(34); + INITW(35); + INITW(36); + INITW(37); + INITW(38); + INITW(39); + + INITW(40); + INITW(41); + INITW(42); + INITW(43); + INITW(44); + INITW(45); + INITW(46); + INITW(47); + INITW(48); + INITW(49); + + INITW(50); + INITW(51); + INITW(52); + INITW(53); + INITW(54); + INITW(55); + INITW(56); + INITW(57); + INITW(58); + INITW(59); + + INITW(60); + INITW(61); + INITW(62); + INITW(63); + +#endif +#undef INITW + } + { + PRUint32 a, b, c, d, e, f, g, h; + + a = H[0]; + b = H[1]; + c = H[2]; + d = H[3]; + e = H[4]; + f = H[5]; + g = H[6]; + h = H[7]; + +#ifdef NOUNROLL256 + { + int t; + for (t = 0; t < 64; t += 8) { + ROUND(t + 0, a, b, c, d, e, f, g, h) + ROUND(t + 1, h, a, b, c, d, e, f, g) + ROUND(t + 2, g, h, a, b, c, d, e, f) + ROUND(t + 3, f, g, h, a, b, c, d, e) + ROUND(t + 4, e, f, g, h, a, b, c, d) + ROUND(t + 5, d, e, f, g, h, a, b, c) + ROUND(t + 6, c, d, e, f, g, h, a, b) + ROUND(t + 7, b, c, d, e, f, g, h, a) + } + } +#else + SHA256_UNROLLED_ROUNDS; +#endif + + H[0] += a; + H[1] += b; + H[2] += c; + H[3] += d; + H[4] += e; + H[5] += f; + H[6] += g; + H[7] += h; + } +#undef ROUND +#endif /* !USE_PPC_CRYPTO */ +} + +#undef s0 +#undef s1 +#undef S0 +#undef S1 + +void +SHA256_Update(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + ctx->update(ctx, input, inputLen); +} + +static void +SHA256_Update_Generic(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + unsigned int inBuf = ctx->sizeLo & 0x3f; + if (!inputLen) + return; + + /* Add inputLen into the count of bytes processed, before processing */ + if ((ctx->sizeLo += inputLen) < inputLen) + ctx->sizeHi++; + + /* if data already in buffer, attemp to fill rest of buffer */ + if (inBuf) { + unsigned int todo = SHA256_BLOCK_LENGTH - inBuf; + if (inputLen < todo) + todo = inputLen; + memcpy(B + inBuf, input, todo); + input += todo; + inputLen -= todo; + if (inBuf + todo == SHA256_BLOCK_LENGTH) + SHA256_Compress_Generic(ctx); + } + + /* if enough data to fill one or more whole buffers, process them. */ + while (inputLen >= SHA256_BLOCK_LENGTH) { + memcpy(B, input, SHA256_BLOCK_LENGTH); + input += SHA256_BLOCK_LENGTH; + inputLen -= SHA256_BLOCK_LENGTH; + SHA256_Compress_Generic(ctx); + } + /* if data left over, fill it into buffer */ + if (inputLen) + memcpy(B, input, inputLen); +} + +void +SHA256_End(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int inBuf = ctx->sizeLo & 0x3f; + unsigned int padLen = (inBuf < 56) ? (56 - inBuf) : (56 + 64 - inBuf); + PRUint32 hi, lo; + + hi = (ctx->sizeHi << 3) | (ctx->sizeLo >> 29); + lo = (ctx->sizeLo << 3); + + ctx->update(ctx, pad, padLen); + +#if defined(IS_LITTLE_ENDIAN) + W[14] = SHA_HTONL(hi); + W[15] = SHA_HTONL(lo); +#else + W[14] = hi; + W[15] = lo; +#endif + ctx->compress(ctx); + +/* now output the answer */ +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP4(H[0]); + BYTESWAP4(H[1]); + BYTESWAP4(H[2]); + BYTESWAP4(H[3]); + BYTESWAP4(H[4]); + BYTESWAP4(H[5]); + BYTESWAP4(H[6]); + BYTESWAP4(H[7]); +#endif + padLen = PR_MIN(SHA256_LENGTH, maxDigestLen); + memcpy(digest, H, padLen); + if (digestLen) + *digestLen = padLen; +} + +void +SHA256_EndRaw(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + PRUint32 h[8]; + unsigned int len; + + memcpy(h, ctx->h, sizeof(h)); + +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP4(h[0]); + BYTESWAP4(h[1]); + BYTESWAP4(h[2]); + BYTESWAP4(h[3]); + BYTESWAP4(h[4]); + BYTESWAP4(h[5]); + BYTESWAP4(h[6]); + BYTESWAP4(h[7]); +#endif + + len = PR_MIN(SHA256_LENGTH, maxDigestLen); + memcpy(digest, h, len); + if (digestLen) + *digestLen = len; +} + +SECStatus +SHA256_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA256Context ctx; + unsigned int outLen; + + SHA256_Begin(&ctx); + SHA256_Update(&ctx, src, src_length); + SHA256_End(&ctx, dest, &outLen, SHA256_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA256_Hash(unsigned char *dest, const char *src) +{ + return SHA256_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA256_TraceState(SHA256Context *ctx) +{ +} + +unsigned int +SHA256_FlattenSize(SHA256Context *ctx) +{ + return sizeof *ctx; +} + +SECStatus +SHA256_Flatten(SHA256Context *ctx, unsigned char *space) +{ + PORT_Memcpy(space, ctx, sizeof *ctx); + return SECSuccess; +} + +SHA256Context * +SHA256_Resurrect(unsigned char *space, void *arg) +{ + SHA256Context *ctx = SHA256_NewContext(); + if (ctx) + PORT_Memcpy(ctx, space, sizeof *ctx); + return ctx; +} + +void +SHA256_Clone(SHA256Context *dest, SHA256Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +/* ============= SHA224 implementation ================================== */ + +/* SHA-224 initial hash values */ +static const PRUint32 H224[8] = { + 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, + 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4 +}; + +SHA224Context * +SHA224_NewContext(void) +{ + return SHA256_NewContext(); +} + +void +SHA224_DestroyContext(SHA224Context *ctx, PRBool freeit) +{ + SHA256_DestroyContext(ctx, freeit); +} + +void +SHA224_Begin(SHA224Context *ctx) +{ + PRBool use_hw_sha2; + + memset(ctx, 0, sizeof *ctx); + memcpy(H, H224, sizeof H224); + +#if defined(USE_HW_SHA2) && defined(IS_LITTLE_ENDIAN) + /* arm's implementation is tested on little endian only */ + use_hw_sha2 = arm_sha2_support() || (sha_support() && ssse3_support() && sse4_1_support()); +#else + use_hw_sha2 = PR_FALSE; +#endif + + if (use_hw_sha2) { + ctx->compress = SHA256_Compress_Native; + ctx->update = SHA256_Update_Native; + } else { + ctx->compress = SHA256_Compress_Generic; + ctx->update = SHA256_Update_Generic; + } +} + +void +SHA224_Update(SHA224Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + ctx->update(ctx, input, inputLen); +} + +void +SHA224_End(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH); + SHA256_End(ctx, digest, digestLen, maxLen); +} + +void +SHA224_EndRaw(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH); + SHA256_EndRaw(ctx, digest, digestLen, maxLen); +} + +SECStatus +SHA224_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA256Context ctx; + unsigned int outLen; + + SHA224_Begin(&ctx); + SHA256_Update(&ctx, src, src_length); + SHA256_End(&ctx, dest, &outLen, SHA224_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA224_Hash(unsigned char *dest, const char *src) +{ + return SHA224_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA224_TraceState(SHA224Context *ctx) +{ +} + +unsigned int +SHA224_FlattenSize(SHA224Context *ctx) +{ + return SHA256_FlattenSize(ctx); +} + +SECStatus +SHA224_Flatten(SHA224Context *ctx, unsigned char *space) +{ + return SHA256_Flatten(ctx, space); +} + +SHA224Context * +SHA224_Resurrect(unsigned char *space, void *arg) +{ + return SHA256_Resurrect(space, arg); +} + +void +SHA224_Clone(SHA224Context *dest, SHA224Context *src) +{ + SHA256_Clone(dest, src); +} + +/* ======= SHA512 and SHA384 common constants and defines ================= */ + +/* common #defines for SHA512 and SHA384 */ +#if defined(HAVE_LONG_LONG) +#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) +#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) +#define s0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SHR(x, 7)) +#define s1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SHR(x, 6)) + +#if PR_BYTES_PER_LONG == 8 +#define ULLC(hi, lo) 0x##hi##lo##UL +#elif defined(_MSC_VER) +#define ULLC(hi, lo) 0x##hi##lo##ui64 +#else +#define ULLC(hi, lo) 0x##hi##lo##ULL +#endif + +#define BYTESWAP8(x) x = FREEBL_HTONLL(x) + +#else /* no long long */ + +#if defined(IS_LITTLE_ENDIAN) +#define ULLC(hi, lo) \ + { \ + 0x##lo##U, 0x##hi##U \ + } +#define FREEBL_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \ + x.hi ^= x.lo ^= x.hi ^= x.lo, x) +#define BYTESWAP8(x) \ + do { \ + PRUint32 tmp; \ + BYTESWAP4(x.lo); \ + BYTESWAP4(x.hi); \ + tmp = x.lo; \ + x.lo = x.hi; \ + x.hi = tmp; \ + } while (0) +#else +#define ULLC(hi, lo) \ + { \ + 0x##hi##U, 0x##lo##U \ + } +#endif + +#endif + +#if defined(USE_PPC_CRYPTO) +void sha512_block_p8(void *ctx, const void *inp, size_t len); + +#else /* USE_PPC_CRYPTO */ + +/* SHA-384 and SHA-512 constants, K512. */ +static const PRUint64 K512[80] = { +#if PR_BYTES_PER_LONG == 8 + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL +#else + ULLC(428a2f98, d728ae22), ULLC(71374491, 23ef65cd), + ULLC(b5c0fbcf, ec4d3b2f), ULLC(e9b5dba5, 8189dbbc), + ULLC(3956c25b, f348b538), ULLC(59f111f1, b605d019), + ULLC(923f82a4, af194f9b), ULLC(ab1c5ed5, da6d8118), + ULLC(d807aa98, a3030242), ULLC(12835b01, 45706fbe), + ULLC(243185be, 4ee4b28c), ULLC(550c7dc3, d5ffb4e2), + ULLC(72be5d74, f27b896f), ULLC(80deb1fe, 3b1696b1), + ULLC(9bdc06a7, 25c71235), ULLC(c19bf174, cf692694), + ULLC(e49b69c1, 9ef14ad2), ULLC(efbe4786, 384f25e3), + ULLC(0fc19dc6, 8b8cd5b5), ULLC(240ca1cc, 77ac9c65), + ULLC(2de92c6f, 592b0275), ULLC(4a7484aa, 6ea6e483), + ULLC(5cb0a9dc, bd41fbd4), ULLC(76f988da, 831153b5), + ULLC(983e5152, ee66dfab), ULLC(a831c66d, 2db43210), + ULLC(b00327c8, 98fb213f), ULLC(bf597fc7, beef0ee4), + ULLC(c6e00bf3, 3da88fc2), ULLC(d5a79147, 930aa725), + ULLC(06ca6351, e003826f), ULLC(14292967, 0a0e6e70), + ULLC(27b70a85, 46d22ffc), ULLC(2e1b2138, 5c26c926), + ULLC(4d2c6dfc, 5ac42aed), ULLC(53380d13, 9d95b3df), + ULLC(650a7354, 8baf63de), ULLC(766a0abb, 3c77b2a8), + ULLC(81c2c92e, 47edaee6), ULLC(92722c85, 1482353b), + ULLC(a2bfe8a1, 4cf10364), ULLC(a81a664b, bc423001), + ULLC(c24b8b70, d0f89791), ULLC(c76c51a3, 0654be30), + ULLC(d192e819, d6ef5218), ULLC(d6990624, 5565a910), + ULLC(f40e3585, 5771202a), ULLC(106aa070, 32bbd1b8), + ULLC(19a4c116, b8d2d0c8), ULLC(1e376c08, 5141ab53), + ULLC(2748774c, df8eeb99), ULLC(34b0bcb5, e19b48a8), + ULLC(391c0cb3, c5c95a63), ULLC(4ed8aa4a, e3418acb), + ULLC(5b9cca4f, 7763e373), ULLC(682e6ff3, d6b2b8a3), + ULLC(748f82ee, 5defb2fc), ULLC(78a5636f, 43172f60), + ULLC(84c87814, a1f0ab72), ULLC(8cc70208, 1a6439ec), + ULLC(90befffa, 23631e28), ULLC(a4506ceb, de82bde9), + ULLC(bef9a3f7, b2c67915), ULLC(c67178f2, e372532b), + ULLC(ca273ece, ea26619c), ULLC(d186b8c7, 21c0c207), + ULLC(eada7dd6, cde0eb1e), ULLC(f57d4f7f, ee6ed178), + ULLC(06f067aa, 72176fba), ULLC(0a637dc5, a2c898a6), + ULLC(113f9804, bef90dae), ULLC(1b710b35, 131c471b), + ULLC(28db77f5, 23047d84), ULLC(32caab7b, 40c72493), + ULLC(3c9ebe0a, 15c9bebc), ULLC(431d67c4, 9c100d4c), + ULLC(4cc5d4be, cb3e42b6), ULLC(597f299c, fc657e2a), + ULLC(5fcb6fab, 3ad6faec), ULLC(6c44198c, 4a475817) +#endif +}; + +#endif /* !USE_PPC_CRYPTO */ + +struct SHA512ContextStr { + union { + PRUint64 w[80]; /* message schedule, input buffer, plus 64 words */ + PRUint32 l[160]; + PRUint8 b[640]; + } u; + PRUint64 h[8]; /* 8 state variables */ + PRUint64 sizeLo; /* 64-bit count of hashed bytes. */ +}; + +/* =========== SHA512 implementation ===================================== */ + +/* SHA-512 initial hash values */ +static const PRUint64 H512[8] = { +#if PR_BYTES_PER_LONG == 8 + 0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL, + 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL, + 0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL, + 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL +#else + ULLC(6a09e667, f3bcc908), ULLC(bb67ae85, 84caa73b), + ULLC(3c6ef372, fe94f82b), ULLC(a54ff53a, 5f1d36f1), + ULLC(510e527f, ade682d1), ULLC(9b05688c, 2b3e6c1f), + ULLC(1f83d9ab, fb41bd6b), ULLC(5be0cd19, 137e2179) +#endif +}; + +SHA512Context * +SHA512_NewContext(void) +{ + SHA512Context *ctx = PORT_New(SHA512Context); + return ctx; +} + +void +SHA512_DestroyContext(SHA512Context *ctx, PRBool freeit) +{ + memset(ctx, 0, sizeof *ctx); + if (freeit) { + PORT_Free(ctx); + } +} + +void +SHA512_Begin(SHA512Context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + memcpy(H, H512, sizeof H512); +} + +#if defined(SHA512_TRACE) +#if defined(HAVE_LONG_LONG) +#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %016lx, %s = %016lx\n", \ + n, #e, d, #a, h); +#else +#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %08x%08x, %s = %08x%08x\n", \ + n, #e, d.hi, d.lo, #a, h.hi, h.lo); +#endif +#else +#define DUMP(n, a, d, e, h) +#endif + +#if defined(HAVE_LONG_LONG) + +#define ADDTO(x, y) y += x + +#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16]) + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + h += S1(e) + Ch(e, f, g) + K512[n] + W[n]; \ + d += h; \ + h += S0(a) + Maj(a, b, c); \ + DUMP(n, a, d, e, h) + +#else /* use only 32-bit variables, and don't unroll loops */ + +#undef NOUNROLL512 +#define NOUNROLL512 1 + +#define ADDTO(x, y) \ + y.lo += x.lo; \ + y.hi += x.hi + (x.lo > y.lo) + +#define ROTR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n)) +#define ROTR64A(x, n, lo, hi) (x.lo << (64 - n) | x.hi >> (n - 32)) +#define SHR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n)) + +/* Capitol Sigma and lower case sigma functions */ +#define s0lo(x) (ROTR64a(x, 1, lo, hi) ^ ROTR64a(x, 8, lo, hi) ^ SHR64a(x, 7, lo, hi)) +#define s0hi(x) (ROTR64a(x, 1, hi, lo) ^ ROTR64a(x, 8, hi, lo) ^ (x.hi >> 7)) + +#define s1lo(x) (ROTR64a(x, 19, lo, hi) ^ ROTR64A(x, 61, lo, hi) ^ SHR64a(x, 6, lo, hi)) +#define s1hi(x) (ROTR64a(x, 19, hi, lo) ^ ROTR64A(x, 61, hi, lo) ^ (x.hi >> 6)) + +#define S0lo(x) (ROTR64a(x, 28, lo, hi) ^ ROTR64A(x, 34, lo, hi) ^ ROTR64A(x, 39, lo, hi)) +#define S0hi(x) (ROTR64a(x, 28, hi, lo) ^ ROTR64A(x, 34, hi, lo) ^ ROTR64A(x, 39, hi, lo)) + +#define S1lo(x) (ROTR64a(x, 14, lo, hi) ^ ROTR64a(x, 18, lo, hi) ^ ROTR64A(x, 41, lo, hi)) +#define S1hi(x) (ROTR64a(x, 14, hi, lo) ^ ROTR64a(x, 18, hi, lo) ^ ROTR64A(x, 41, hi, lo)) + +/* 32-bit versions of Ch and Maj */ +#define Chxx(x, y, z, lo) ((x.lo & y.lo) ^ (~x.lo & z.lo)) +#define Majx(x, y, z, lo) ((x.lo & y.lo) ^ (x.lo & z.lo) ^ (y.lo & z.lo)) + +#define INITW(t) \ + do { \ + PRUint32 lo, tm; \ + PRUint32 cy = 0; \ + lo = s1lo(W[t - 2]); \ + lo += (tm = W[t - 7].lo); \ + if (lo < tm) \ + cy++; \ + lo += (tm = s0lo(W[t - 15])); \ + if (lo < tm) \ + cy++; \ + lo += (tm = W[t - 16].lo); \ + if (lo < tm) \ + cy++; \ + W[t].lo = lo; \ + W[t].hi = cy + s1hi(W[t - 2]) + W[t - 7].hi + s0hi(W[t - 15]) + W[t - 16].hi; \ + } while (0) + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + { \ + PRUint32 lo, tm, cy; \ + lo = S1lo(e); \ + lo += (tm = Chxx(e, f, g, lo)); \ + cy = (lo < tm); \ + lo += (tm = K512[n].lo); \ + if (lo < tm) \ + cy++; \ + lo += (tm = W[n].lo); \ + if (lo < tm) \ + cy++; \ + h.lo += lo; \ + if (h.lo < lo) \ + cy++; \ + h.hi += cy + S1hi(e) + Chxx(e, f, g, hi) + K512[n].hi + W[n].hi; \ + d.lo += h.lo; \ + d.hi += h.hi + (d.lo < h.lo); \ + lo = S0lo(a); \ + lo += (tm = Majx(a, b, c, lo)); \ + cy = (lo < tm); \ + h.lo += lo; \ + if (h.lo < lo) \ + cy++; \ + h.hi += cy + S0hi(a) + Majx(a, b, c, hi); \ + DUMP(n, a, d, e, h) \ + } +#endif + +static void +SHA512_Compress(SHA512Context *ctx) +{ +#if defined(USE_PPC_CRYPTO) + sha512_block_p8(&H[0], &W[0], 1); +#else /* USE_PPC_CRYPTO */ + +#if defined(IS_LITTLE_ENDIAN) + { + BYTESWAP8(W[0]); + BYTESWAP8(W[1]); + BYTESWAP8(W[2]); + BYTESWAP8(W[3]); + BYTESWAP8(W[4]); + BYTESWAP8(W[5]); + BYTESWAP8(W[6]); + BYTESWAP8(W[7]); + BYTESWAP8(W[8]); + BYTESWAP8(W[9]); + BYTESWAP8(W[10]); + BYTESWAP8(W[11]); + BYTESWAP8(W[12]); + BYTESWAP8(W[13]); + BYTESWAP8(W[14]); + BYTESWAP8(W[15]); + } +#endif + + { +#ifdef NOUNROLL512 + { + /* prepare the "message schedule" */ + int t; + for (t = 16; t < 80; ++t) { + INITW(t); + } + } +#else + INITW(16); + INITW(17); + INITW(18); + INITW(19); + + INITW(20); + INITW(21); + INITW(22); + INITW(23); + INITW(24); + INITW(25); + INITW(26); + INITW(27); + INITW(28); + INITW(29); + + INITW(30); + INITW(31); + INITW(32); + INITW(33); + INITW(34); + INITW(35); + INITW(36); + INITW(37); + INITW(38); + INITW(39); + + INITW(40); + INITW(41); + INITW(42); + INITW(43); + INITW(44); + INITW(45); + INITW(46); + INITW(47); + INITW(48); + INITW(49); + + INITW(50); + INITW(51); + INITW(52); + INITW(53); + INITW(54); + INITW(55); + INITW(56); + INITW(57); + INITW(58); + INITW(59); + + INITW(60); + INITW(61); + INITW(62); + INITW(63); + INITW(64); + INITW(65); + INITW(66); + INITW(67); + INITW(68); + INITW(69); + + INITW(70); + INITW(71); + INITW(72); + INITW(73); + INITW(74); + INITW(75); + INITW(76); + INITW(77); + INITW(78); + INITW(79); +#endif + } +#ifdef SHA512_TRACE + { + int i; + for (i = 0; i < 80; ++i) { +#ifdef HAVE_LONG_LONG + printf("W[%2d] = %016lx\n", i, W[i]); +#else + printf("W[%2d] = %08x%08x\n", i, W[i].hi, W[i].lo); +#endif + } + } +#endif + { + PRUint64 a, b, c, d, e, f, g, h; + + a = H[0]; + b = H[1]; + c = H[2]; + d = H[3]; + e = H[4]; + f = H[5]; + g = H[6]; + h = H[7]; + +#ifdef NOUNROLL512 + { + int t; + for (t = 0; t < 80; t += 8) { + ROUND(t + 0, a, b, c, d, e, f, g, h) + ROUND(t + 1, h, a, b, c, d, e, f, g) + ROUND(t + 2, g, h, a, b, c, d, e, f) + ROUND(t + 3, f, g, h, a, b, c, d, e) + ROUND(t + 4, e, f, g, h, a, b, c, d) + ROUND(t + 5, d, e, f, g, h, a, b, c) + ROUND(t + 6, c, d, e, f, g, h, a, b) + ROUND(t + 7, b, c, d, e, f, g, h, a) + } + } +#else + ROUND(0, a, b, c, d, e, f, g, h) + ROUND(1, h, a, b, c, d, e, f, g) + ROUND(2, g, h, a, b, c, d, e, f) + ROUND(3, f, g, h, a, b, c, d, e) + ROUND(4, e, f, g, h, a, b, c, d) + ROUND(5, d, e, f, g, h, a, b, c) + ROUND(6, c, d, e, f, g, h, a, b) + ROUND(7, b, c, d, e, f, g, h, a) + + ROUND(8, a, b, c, d, e, f, g, h) + ROUND(9, h, a, b, c, d, e, f, g) + ROUND(10, g, h, a, b, c, d, e, f) + ROUND(11, f, g, h, a, b, c, d, e) + ROUND(12, e, f, g, h, a, b, c, d) + ROUND(13, d, e, f, g, h, a, b, c) + ROUND(14, c, d, e, f, g, h, a, b) + ROUND(15, b, c, d, e, f, g, h, a) + + ROUND(16, a, b, c, d, e, f, g, h) + ROUND(17, h, a, b, c, d, e, f, g) + ROUND(18, g, h, a, b, c, d, e, f) + ROUND(19, f, g, h, a, b, c, d, e) + ROUND(20, e, f, g, h, a, b, c, d) + ROUND(21, d, e, f, g, h, a, b, c) + ROUND(22, c, d, e, f, g, h, a, b) + ROUND(23, b, c, d, e, f, g, h, a) + + ROUND(24, a, b, c, d, e, f, g, h) + ROUND(25, h, a, b, c, d, e, f, g) + ROUND(26, g, h, a, b, c, d, e, f) + ROUND(27, f, g, h, a, b, c, d, e) + ROUND(28, e, f, g, h, a, b, c, d) + ROUND(29, d, e, f, g, h, a, b, c) + ROUND(30, c, d, e, f, g, h, a, b) + ROUND(31, b, c, d, e, f, g, h, a) + + ROUND(32, a, b, c, d, e, f, g, h) + ROUND(33, h, a, b, c, d, e, f, g) + ROUND(34, g, h, a, b, c, d, e, f) + ROUND(35, f, g, h, a, b, c, d, e) + ROUND(36, e, f, g, h, a, b, c, d) + ROUND(37, d, e, f, g, h, a, b, c) + ROUND(38, c, d, e, f, g, h, a, b) + ROUND(39, b, c, d, e, f, g, h, a) + + ROUND(40, a, b, c, d, e, f, g, h) + ROUND(41, h, a, b, c, d, e, f, g) + ROUND(42, g, h, a, b, c, d, e, f) + ROUND(43, f, g, h, a, b, c, d, e) + ROUND(44, e, f, g, h, a, b, c, d) + ROUND(45, d, e, f, g, h, a, b, c) + ROUND(46, c, d, e, f, g, h, a, b) + ROUND(47, b, c, d, e, f, g, h, a) + + ROUND(48, a, b, c, d, e, f, g, h) + ROUND(49, h, a, b, c, d, e, f, g) + ROUND(50, g, h, a, b, c, d, e, f) + ROUND(51, f, g, h, a, b, c, d, e) + ROUND(52, e, f, g, h, a, b, c, d) + ROUND(53, d, e, f, g, h, a, b, c) + ROUND(54, c, d, e, f, g, h, a, b) + ROUND(55, b, c, d, e, f, g, h, a) + + ROUND(56, a, b, c, d, e, f, g, h) + ROUND(57, h, a, b, c, d, e, f, g) + ROUND(58, g, h, a, b, c, d, e, f) + ROUND(59, f, g, h, a, b, c, d, e) + ROUND(60, e, f, g, h, a, b, c, d) + ROUND(61, d, e, f, g, h, a, b, c) + ROUND(62, c, d, e, f, g, h, a, b) + ROUND(63, b, c, d, e, f, g, h, a) + + ROUND(64, a, b, c, d, e, f, g, h) + ROUND(65, h, a, b, c, d, e, f, g) + ROUND(66, g, h, a, b, c, d, e, f) + ROUND(67, f, g, h, a, b, c, d, e) + ROUND(68, e, f, g, h, a, b, c, d) + ROUND(69, d, e, f, g, h, a, b, c) + ROUND(70, c, d, e, f, g, h, a, b) + ROUND(71, b, c, d, e, f, g, h, a) + + ROUND(72, a, b, c, d, e, f, g, h) + ROUND(73, h, a, b, c, d, e, f, g) + ROUND(74, g, h, a, b, c, d, e, f) + ROUND(75, f, g, h, a, b, c, d, e) + ROUND(76, e, f, g, h, a, b, c, d) + ROUND(77, d, e, f, g, h, a, b, c) + ROUND(78, c, d, e, f, g, h, a, b) + ROUND(79, b, c, d, e, f, g, h, a) +#endif + + ADDTO(a, H[0]); + ADDTO(b, H[1]); + ADDTO(c, H[2]); + ADDTO(d, H[3]); + ADDTO(e, H[4]); + ADDTO(f, H[5]); + ADDTO(g, H[6]); + ADDTO(h, H[7]); + } + +#endif /* !USE_PPC_CRYPTO */ +} + +void +SHA512_Update(SHA512Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + unsigned int inBuf; + if (!inputLen) + return; + +#if defined(HAVE_LONG_LONG) + inBuf = (unsigned int)ctx->sizeLo & 0x7f; + /* Add inputLen into the count of bytes processed, before processing */ + ctx->sizeLo += inputLen; +#else + inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f; + ctx->sizeLo.lo += inputLen; + if (ctx->sizeLo.lo < inputLen) + ctx->sizeLo.hi++; +#endif + + /* if data already in buffer, attemp to fill rest of buffer */ + if (inBuf) { + unsigned int todo = SHA512_BLOCK_LENGTH - inBuf; + if (inputLen < todo) + todo = inputLen; + memcpy(B + inBuf, input, todo); + input += todo; + inputLen -= todo; + if (inBuf + todo == SHA512_BLOCK_LENGTH) + SHA512_Compress(ctx); + } + + /* if enough data to fill one or more whole buffers, process them. */ + while (inputLen >= SHA512_BLOCK_LENGTH) { + memcpy(B, input, SHA512_BLOCK_LENGTH); + input += SHA512_BLOCK_LENGTH; + inputLen -= SHA512_BLOCK_LENGTH; + SHA512_Compress(ctx); + } + /* if data left over, fill it into buffer */ + if (inputLen) + memcpy(B, input, inputLen); +} + +void +SHA512_End(SHA512Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ +#if defined(HAVE_LONG_LONG) + unsigned int inBuf = (unsigned int)ctx->sizeLo & 0x7f; +#else + unsigned int inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f; +#endif + unsigned int padLen = (inBuf < 112) ? (112 - inBuf) : (112 + 128 - inBuf); + PRUint64 lo; + LL_SHL(lo, ctx->sizeLo, 3); + + SHA512_Update(ctx, pad, padLen); + +#if defined(HAVE_LONG_LONG) + W[14] = 0; +#else + W[14].lo = 0; + W[14].hi = 0; +#endif + + W[15] = lo; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP8(W[15]); +#endif + SHA512_Compress(ctx); + +/* now output the answer */ +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP8(H[0]); + BYTESWAP8(H[1]); + BYTESWAP8(H[2]); + BYTESWAP8(H[3]); + BYTESWAP8(H[4]); + BYTESWAP8(H[5]); + BYTESWAP8(H[6]); + BYTESWAP8(H[7]); +#endif + padLen = PR_MIN(SHA512_LENGTH, maxDigestLen); + memcpy(digest, H, padLen); + if (digestLen) + *digestLen = padLen; +} + +void +SHA512_EndRaw(SHA512Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + PRUint64 h[8]; + unsigned int len; + + memcpy(h, ctx->h, sizeof(h)); + +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP8(h[0]); + BYTESWAP8(h[1]); + BYTESWAP8(h[2]); + BYTESWAP8(h[3]); + BYTESWAP8(h[4]); + BYTESWAP8(h[5]); + BYTESWAP8(h[6]); + BYTESWAP8(h[7]); +#endif + len = PR_MIN(SHA512_LENGTH, maxDigestLen); + memcpy(digest, h, len); + if (digestLen) + *digestLen = len; +} + +SECStatus +SHA512_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA512Context ctx; + unsigned int outLen; + + SHA512_Begin(&ctx); + SHA512_Update(&ctx, src, src_length); + SHA512_End(&ctx, dest, &outLen, SHA512_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA512_Hash(unsigned char *dest, const char *src) +{ + return SHA512_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA512_TraceState(SHA512Context *ctx) +{ +} + +unsigned int +SHA512_FlattenSize(SHA512Context *ctx) +{ + return sizeof *ctx; +} + +SECStatus +SHA512_Flatten(SHA512Context *ctx, unsigned char *space) +{ + PORT_Memcpy(space, ctx, sizeof *ctx); + return SECSuccess; +} + +SHA512Context * +SHA512_Resurrect(unsigned char *space, void *arg) +{ + SHA512Context *ctx = SHA512_NewContext(); + if (ctx) + PORT_Memcpy(ctx, space, sizeof *ctx); + return ctx; +} + +void +SHA512_Clone(SHA512Context *dest, SHA512Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +/* ======================================================================= */ +/* SHA384 uses a SHA512Context as the real context. +** The only differences between SHA384 an SHA512 are: +** a) the intialization values for the context, and +** b) the number of bytes of data produced as output. +*/ + +/* SHA-384 initial hash values */ +static const PRUint64 H384[8] = { +#if PR_BYTES_PER_LONG == 8 + 0xcbbb9d5dc1059ed8UL, 0x629a292a367cd507UL, + 0x9159015a3070dd17UL, 0x152fecd8f70e5939UL, + 0x67332667ffc00b31UL, 0x8eb44a8768581511UL, + 0xdb0c2e0d64f98fa7UL, 0x47b5481dbefa4fa4UL +#else + ULLC(cbbb9d5d, c1059ed8), ULLC(629a292a, 367cd507), + ULLC(9159015a, 3070dd17), ULLC(152fecd8, f70e5939), + ULLC(67332667, ffc00b31), ULLC(8eb44a87, 68581511), + ULLC(db0c2e0d, 64f98fa7), ULLC(47b5481d, befa4fa4) +#endif +}; + +SHA384Context * +SHA384_NewContext(void) +{ + return SHA512_NewContext(); +} + +void +SHA384_DestroyContext(SHA384Context *ctx, PRBool freeit) +{ + SHA512_DestroyContext(ctx, freeit); +} + +void +SHA384_Begin(SHA384Context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + memcpy(H, H384, sizeof H384); +} + +void +SHA384_Update(SHA384Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + SHA512_Update(ctx, input, inputLen); +} + +void +SHA384_End(SHA384Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH); + SHA512_End(ctx, digest, digestLen, maxLen); +} + +void +SHA384_EndRaw(SHA384Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH); + SHA512_EndRaw(ctx, digest, digestLen, maxLen); +} + +SECStatus +SHA384_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA512Context ctx; + unsigned int outLen; + + SHA384_Begin(&ctx); + SHA512_Update(&ctx, src, src_length); + SHA512_End(&ctx, dest, &outLen, SHA384_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA384_Hash(unsigned char *dest, const char *src) +{ + return SHA384_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA384_TraceState(SHA384Context *ctx) +{ +} + +unsigned int +SHA384_FlattenSize(SHA384Context *ctx) +{ + return sizeof(SHA384Context); +} + +SECStatus +SHA384_Flatten(SHA384Context *ctx, unsigned char *space) +{ + return SHA512_Flatten(ctx, space); +} + +SHA384Context * +SHA384_Resurrect(unsigned char *space, void *arg) +{ + return SHA512_Resurrect(space, arg); +} + +void +SHA384_Clone(SHA384Context *dest, SHA384Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +/* ======================================================================= */ +#ifdef SELFTEST +#include + +static const char abc[] = { "abc" }; +static const char abcdbc[] = { + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" +}; +static const char abcdef[] = { + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" +}; + +void +dumpHash32(const unsigned char *buf, unsigned int bufLen) +{ + unsigned int i; + for (i = 0; i < bufLen; i += 4) { + printf(" %02x%02x%02x%02x", buf[i], buf[i + 1], buf[i + 2], buf[i + 3]); + } + printf("\n"); +} + +void +test256(void) +{ + unsigned char outBuf[SHA256_LENGTH]; + + printf("SHA256, input = %s\n", abc); + SHA256_Hash(outBuf, abc); + dumpHash32(outBuf, sizeof outBuf); + + printf("SHA256, input = %s\n", abcdbc); + SHA256_Hash(outBuf, abcdbc); + dumpHash32(outBuf, sizeof outBuf); +} + +void +test224(void) +{ + SHA224Context ctx; + unsigned char a1000times[1000]; + unsigned int outLen; + unsigned char outBuf[SHA224_LENGTH]; + int i; + + /* Test Vector 1 */ + printf("SHA224, input = %s\n", abc); + SHA224_Hash(outBuf, abc); + dumpHash32(outBuf, sizeof outBuf); + + /* Test Vector 2 */ + printf("SHA224, input = %s\n", abcdbc); + SHA224_Hash(outBuf, abcdbc); + dumpHash32(outBuf, sizeof outBuf); + + /* Test Vector 3 */ + + /* to hash one million 'a's perform 1000 + * sha224 updates on a buffer with 1000 'a's + */ + memset(a1000times, 'a', 1000); + printf("SHA224, input = %s\n", "a one million times"); + SHA224_Begin(&ctx); + for (i = 0; i < 1000; i++) + SHA224_Update(&ctx, a1000times, 1000); + SHA224_End(&ctx, outBuf, &outLen, SHA224_LENGTH); + dumpHash32(outBuf, sizeof outBuf); +} + +void +dumpHash64(const unsigned char *buf, unsigned int bufLen) +{ + unsigned int i; + for (i = 0; i < bufLen; i += 8) { + if (i % 32 == 0) + printf("\n"); + printf(" %02x%02x%02x%02x%02x%02x%02x%02x", + buf[i], buf[i + 1], buf[i + 2], buf[i + 3], + buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]); + } + printf("\n"); +} + +void +test512(void) +{ + unsigned char outBuf[SHA512_LENGTH]; + + printf("SHA512, input = %s\n", abc); + SHA512_Hash(outBuf, abc); + dumpHash64(outBuf, sizeof outBuf); + + printf("SHA512, input = %s\n", abcdef); + SHA512_Hash(outBuf, abcdef); + dumpHash64(outBuf, sizeof outBuf); +} + +void +time512(void) +{ + unsigned char outBuf[SHA512_LENGTH]; + + SHA512_Hash(outBuf, abc); + SHA512_Hash(outBuf, abcdef); +} + +void +test384(void) +{ + unsigned char outBuf[SHA384_LENGTH]; + + printf("SHA384, input = %s\n", abc); + SHA384_Hash(outBuf, abc); + dumpHash64(outBuf, sizeof outBuf); + + printf("SHA384, input = %s\n", abcdef); + SHA384_Hash(outBuf, abcdef); + dumpHash64(outBuf, sizeof outBuf); +} + +int +main(int argc, char *argv[], char *envp[]) +{ + int i = 1; + if (argc > 1) { + i = atoi(argv[1]); + } + if (i < 2) { + test224(); + test256(); + test384(); + test512(); + } else { + while (i-- > 0) { + time512(); + } + printf("done\n"); + } + return 0; +} + +void * +PORT_Alloc(size_t len) +{ + return malloc(len); +} +void +PORT_Free(void *ptr) +{ + free(ptr); +} +void +PORT_ZFree(void *ptr, size_t len) +{ + memset(ptr, 0, len); + free(ptr); +} +#endif diff --git a/security/nss/lib/freebl/sha_fast.c b/security/nss/lib/freebl/sha_fast.c new file mode 100644 index 0000000000..2a8ac576c1 --- /dev/null +++ b/security/nss/lib/freebl/sha_fast.c @@ -0,0 +1,592 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include "blapi.h" +#include "sha_fast.h" +#include "prerror.h" +#include "secerr.h" + +#ifdef TRACING_SSL +#include "ssl.h" +#include "ssltrace.h" +#endif + +static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain); + +#define W u.w +#define B u.b + +#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z)) +#define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y)))) +#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z)) + +#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1) + +void SHA1_Compress_Native(SHA1Context *ctx); +void SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len); + +static void SHA1_Compress_Generic(SHA1Context *ctx); +static void SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len); + +#ifndef USE_HW_SHA1 +void +SHA1_Compress_Native(SHA1Context *ctx) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} +#endif + +/* + * SHA: initialize context + */ +void +SHA1_Begin(SHA1Context *ctx) +{ + ctx->size = 0; + /* + * Initialize H with constants from FIPS180-1. + */ + ctx->H[0] = 0x67452301L; + ctx->H[1] = 0xefcdab89L; + ctx->H[2] = 0x98badcfeL; + ctx->H[3] = 0x10325476L; + ctx->H[4] = 0xc3d2e1f0L; + +#if defined(USE_HW_SHA1) && defined(IS_LITTLE_ENDIAN) + /* arm's implementation is tested on little endian only */ + if (arm_sha1_support()) { + ctx->compress = SHA1_Compress_Native; + ctx->update = SHA1_Update_Native; + } else +#endif + { + ctx->compress = SHA1_Compress_Generic; + ctx->update = SHA1_Update_Generic; + } +} + +/* Explanation of H array and index values: + * The context's H array is actually the concatenation of two arrays + * defined by SHA1, the H array of state variables (5 elements), + * and the W array of intermediate values, of which there are 16 elements. + * The W array starts at H[5], that is W[0] is H[5]. + * Although these values are defined as 32-bit values, we use 64-bit + * variables to hold them because the AMD64 stores 64 bit values in + * memory MUCH faster than it stores any smaller values. + * + * Rather than passing the context structure to shaCompress, we pass + * this combined array of H and W values. We do not pass the address + * of the first element of this array, but rather pass the address of an + * element in the middle of the array, element X. Presently X[0] is H[11]. + * So we pass the address of H[11] as the address of array X to shaCompress. + * Then shaCompress accesses the members of the array using positive AND + * negative indexes. + * + * Pictorially: (each element is 8 bytes) + * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf | + * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 | + * + * The byte offset from X[0] to any member of H and W is always + * representable in a signed 8-bit value, which will be encoded + * as a single byte offset in the X86-64 instruction set. + * If we didn't pass the address of H[11], and instead passed the + * address of H[0], the offsets to elements H[16] and above would be + * greater than 127, not representable in a signed 8-bit value, and the + * x86-64 instruction set would encode every such offset as a 32-bit + * signed number in each instruction that accessed element H[16] or + * higher. This results in much bigger and slower code. + */ +#if !defined(SHA_PUT_W_IN_STACK) +#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */ +#define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */ +#else +#define H2X 0 +#endif + +/* + * SHA: Add data to context. + */ +void +SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len) +{ + ctx->update(ctx, dataIn, len); +} + +static void +SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len) +{ + register unsigned int lenB; + register unsigned int togo; + + if (!len) + return; + + /* accumulate the byte count. */ + lenB = (unsigned int)(ctx->size) & 63U; + + ctx->size += len; + + /* + * Read the data into W and process blocks as they get full + */ + if (lenB > 0) { + togo = 64U - lenB; + if (len < togo) + togo = len; + memcpy(ctx->B + lenB, dataIn, togo); + len -= togo; + dataIn += togo; + lenB = (lenB + togo) & 63U; + if (!lenB) { + shaCompress(&ctx->H[H2X], ctx->W); + } + } +#if !defined(HAVE_UNALIGNED_ACCESS) + if ((ptrdiff_t)dataIn % sizeof(PRUint32)) { + while (len >= 64U) { + memcpy(ctx->B, dataIn, 64); + len -= 64U; + shaCompress(&ctx->H[H2X], ctx->W); + dataIn += 64U; + } + } else +#endif + { + while (len >= 64U) { + len -= 64U; + shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn); + dataIn += 64U; + } + } + if (len) { + memcpy(ctx->B, dataIn, len); + } +} + +/* + * SHA: Generate hash value from context + */ +void NO_SANITIZE_ALIGNMENT +SHA1_End(SHA1Context *ctx, unsigned char *hashout, + unsigned int *pDigestLen, unsigned int maxDigestLen) +{ + register PRUint64 size; + register PRUint32 lenB; + + static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +#define tmp lenB + + PORT_Assert(maxDigestLen >= SHA1_LENGTH); + + /* + * Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits + */ + size = ctx->size; + + lenB = (PRUint32)size & 63; + SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1); + PORT_Assert(((PRUint32)ctx->size & 63) == 56); + /* Convert size from bytes to bits. */ + size <<= 3; + ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32)); + ctx->W[15] = SHA_HTONL((PRUint32)size); + ctx->compress(ctx); + + /* + * Output hash + */ + SHA_STORE_RESULT; + if (pDigestLen) { + *pDigestLen = SHA1_LENGTH; + } +#undef tmp +} + +void +SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout, + unsigned int *pDigestLen, unsigned int maxDigestLen) +{ +#if defined(SHA_NEED_TMP_VARIABLE) + register PRUint32 tmp; +#endif + PORT_Assert(maxDigestLen >= SHA1_LENGTH); + + SHA_STORE_RESULT; + if (pDigestLen) + *pDigestLen = SHA1_LENGTH; +} + +#undef B +/* + * SHA: Compression function, unrolled. + * + * Some operations in shaCompress are done as 5 groups of 16 operations. + * Others are done as 4 groups of 20 operations. + * The code below shows that structure. + * + * The functions that compute the new values of the 5 state variables + * A-E are done in 4 groups of 20 operations (or you may also think + * of them as being done in 16 groups of 5 operations). They are + * done by the SHA_RNDx macros below, in the right column. + * + * The functions that set the 16 values of the W array are done in + * 5 groups of 16 operations. The first group is done by the + * LOAD macros below, the latter 4 groups are done by SHA_MIX below, + * in the left column. + * + * gcc's optimizer observes that each member of the W array is assigned + * a value 5 times in this code. It reduces the number of store + * operations done to the W array in the context (that is, in the X array) + * by creating a W array on the stack, and storing the W values there for + * the first 4 groups of operations on W, and storing the values in the + * context's W array only in the fifth group. This is undesirable. + * It is MUCH bigger code than simply using the context's W array, because + * all the offsets to the W array in the stack are 32-bit signed offsets, + * and it is no faster than storing the values in the context's W array. + * + * The original code for sha_fast.c prevented this creation of a separate + * W array in the stack by creating a W array of 80 members, each of + * whose elements is assigned only once. It also separated the computations + * of the W array values and the computations of the values for the 5 + * state variables into two separate passes, W's, then A-E's so that the + * second pass could be done all in registers (except for accessing the W + * array) on machines with fewer registers. The method is suboptimal + * for machines with enough registers to do it all in one pass, and it + * necessitates using many instructions with 32-bit offsets. + * + * This code eliminates the separate W array on the stack by a completely + * different means: by declaring the X array volatile. This prevents + * the optimizer from trying to reduce the use of the X array by the + * creation of a MORE expensive W array on the stack. The result is + * that all instructions use signed 8-bit offsets and not 32-bit offsets. + * + * The combination of this code and the -O3 optimizer flag on GCC 3.4.3 + * results in code that is 3 times faster than the previous NSS sha_fast + * code on AMD64. + */ +static void NO_SANITIZE_ALIGNMENT +shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf) +{ + register SHA_HW_t A, B, C, D, E; + +#if defined(SHA_NEED_TMP_VARIABLE) + register PRUint32 tmp; +#endif + +#if !defined(SHA_PUT_W_IN_STACK) +#define XH(n) X[n - H2X] +#define XW(n) X[n - W2X] +#else + SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7, + w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; +#define XW(n) w_##n +#define XH(n) X[n] +#endif + +#define K0 0x5a827999L +#define K1 0x6ed9eba1L +#define K2 0x8f1bbcdcL +#define K3 0xca62c1d6L + +#define SHA_RND1(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \ + c = SHA_ROTL(c, 30) +#define SHA_RND2(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \ + c = SHA_ROTL(c, 30) +#define SHA_RND3(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \ + c = SHA_ROTL(c, 30) +#define SHA_RND4(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \ + c = SHA_ROTL(c, 30) + +#define LOAD(n) XW(n) = SHA_HTONL(inbuf[n]) + + A = XH(0); + B = XH(1); + C = XH(2); + D = XH(3); + E = XH(4); + + LOAD(0); + SHA_RND1(E, A, B, C, D, 0); + LOAD(1); + SHA_RND1(D, E, A, B, C, 1); + LOAD(2); + SHA_RND1(C, D, E, A, B, 2); + LOAD(3); + SHA_RND1(B, C, D, E, A, 3); + LOAD(4); + SHA_RND1(A, B, C, D, E, 4); + LOAD(5); + SHA_RND1(E, A, B, C, D, 5); + LOAD(6); + SHA_RND1(D, E, A, B, C, 6); + LOAD(7); + SHA_RND1(C, D, E, A, B, 7); + LOAD(8); + SHA_RND1(B, C, D, E, A, 8); + LOAD(9); + SHA_RND1(A, B, C, D, E, 9); + LOAD(10); + SHA_RND1(E, A, B, C, D, 10); + LOAD(11); + SHA_RND1(D, E, A, B, C, 11); + LOAD(12); + SHA_RND1(C, D, E, A, B, 12); + LOAD(13); + SHA_RND1(B, C, D, E, A, 13); + LOAD(14); + SHA_RND1(A, B, C, D, E, 14); + LOAD(15); + SHA_RND1(E, A, B, C, D, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND1(D, E, A, B, C, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND1(C, D, E, A, B, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND1(B, C, D, E, A, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND1(A, B, C, D, E, 3); + + SHA_MIX(4, 1, 12, 6); + SHA_RND2(E, A, B, C, D, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND2(D, E, A, B, C, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND2(C, D, E, A, B, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND2(B, C, D, E, A, 7); + SHA_MIX(8, 5, 0, 10); + SHA_RND2(A, B, C, D, E, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND2(E, A, B, C, D, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND2(D, E, A, B, C, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND2(C, D, E, A, B, 11); + SHA_MIX(12, 9, 4, 14); + SHA_RND2(B, C, D, E, A, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND2(A, B, C, D, E, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND2(E, A, B, C, D, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND2(D, E, A, B, C, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND2(C, D, E, A, B, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND2(B, C, D, E, A, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND2(A, B, C, D, E, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND2(E, A, B, C, D, 3); + SHA_MIX(4, 1, 12, 6); + SHA_RND2(D, E, A, B, C, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND2(C, D, E, A, B, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND2(B, C, D, E, A, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND2(A, B, C, D, E, 7); + + SHA_MIX(8, 5, 0, 10); + SHA_RND3(E, A, B, C, D, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND3(D, E, A, B, C, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND3(C, D, E, A, B, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND3(B, C, D, E, A, 11); + SHA_MIX(12, 9, 4, 14); + SHA_RND3(A, B, C, D, E, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND3(E, A, B, C, D, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND3(D, E, A, B, C, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND3(C, D, E, A, B, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND3(B, C, D, E, A, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND3(A, B, C, D, E, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND3(E, A, B, C, D, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND3(D, E, A, B, C, 3); + SHA_MIX(4, 1, 12, 6); + SHA_RND3(C, D, E, A, B, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND3(B, C, D, E, A, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND3(A, B, C, D, E, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND3(E, A, B, C, D, 7); + SHA_MIX(8, 5, 0, 10); + SHA_RND3(D, E, A, B, C, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND3(C, D, E, A, B, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND3(B, C, D, E, A, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND3(A, B, C, D, E, 11); + + SHA_MIX(12, 9, 4, 14); + SHA_RND4(E, A, B, C, D, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND4(D, E, A, B, C, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND4(C, D, E, A, B, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND4(B, C, D, E, A, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND4(A, B, C, D, E, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND4(E, A, B, C, D, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND4(D, E, A, B, C, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND4(C, D, E, A, B, 3); + SHA_MIX(4, 1, 12, 6); + SHA_RND4(B, C, D, E, A, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND4(A, B, C, D, E, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND4(E, A, B, C, D, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND4(D, E, A, B, C, 7); + SHA_MIX(8, 5, 0, 10); + SHA_RND4(C, D, E, A, B, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND4(B, C, D, E, A, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND4(A, B, C, D, E, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND4(E, A, B, C, D, 11); + SHA_MIX(12, 9, 4, 14); + SHA_RND4(D, E, A, B, C, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND4(C, D, E, A, B, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND4(B, C, D, E, A, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND4(A, B, C, D, E, 15); + + XH(0) += A; + XH(1) += B; + XH(2) += C; + XH(3) += D; + XH(4) += E; +} + +static void +SHA1_Compress_Generic(SHA1Context *ctx) +{ + shaCompress(&ctx->H[H2X], ctx->u.w); +} + +/************************************************************************* +** Code below this line added to make SHA code support BLAPI interface +*/ + +SHA1Context * +SHA1_NewContext(void) +{ + SHA1Context *cx; + + /* no need to ZNew, SHA1_Begin will init the context */ + cx = PORT_New(SHA1Context); + return cx; +} + +/* Zero and free the context */ +void +SHA1_DestroyContext(SHA1Context *cx, PRBool freeit) +{ + memset(cx, 0, sizeof *cx); + if (freeit) { + PORT_Free(cx); + } +} + +SECStatus +SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + SHA1Context ctx; + unsigned int outLen; + + SHA1_Begin(&ctx); + ctx.update(&ctx, src, src_length); + SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH); + memset(&ctx, 0, sizeof ctx); + return SECSuccess; +} + +/* Hash a null-terminated character string. */ +SECStatus +SHA1_Hash(unsigned char *dest, const char *src) +{ + return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +/* + * need to support save/restore state in pkcs11. Stores all the info necessary + * for a structure into just a stream of bytes. + */ +unsigned int +SHA1_FlattenSize(SHA1Context *cx) +{ + return sizeof(SHA1Context); +} + +SECStatus +SHA1_Flatten(SHA1Context *cx, unsigned char *space) +{ + PORT_Memcpy(space, cx, sizeof(SHA1Context)); + return SECSuccess; +} + +SHA1Context * +SHA1_Resurrect(unsigned char *space, void *arg) +{ + SHA1Context *cx = SHA1_NewContext(); + if (cx == NULL) + return NULL; + + PORT_Memcpy(cx, space, sizeof(SHA1Context)); + return cx; +} + +void +SHA1_Clone(SHA1Context *dest, SHA1Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +void +SHA1_TraceState(SHA1Context *ctx) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); +} diff --git a/security/nss/lib/freebl/sha_fast.h b/security/nss/lib/freebl/sha_fast.h new file mode 100644 index 0000000000..c03c0637a3 --- /dev/null +++ b/security/nss/lib/freebl/sha_fast.h @@ -0,0 +1,186 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SHA_FAST_H_ +#define _SHA_FAST_H_ + +#include "prlong.h" +#include "blapii.h" + +#define SHA1_INPUT_LEN 64 + +#if defined(IS_64) && !defined(__sparc) && !defined(__aarch64__) +typedef PRUint64 SHA_HW_t; +#define SHA1_USING_64_BIT 1 +#else +typedef PRUint32 SHA_HW_t; +#endif + +struct SHA1ContextStr; + +typedef void (*sha1_compress_t)(struct SHA1ContextStr *); +typedef void (*sha1_update_t)(struct SHA1ContextStr *, const unsigned char *, + unsigned int); + +struct SHA1ContextStr { + union { + PRUint32 w[16]; /* input buffer */ + PRUint8 b[64]; + } u; + PRUint64 size; /* count of hashed bytes. */ + SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */ + sha1_compress_t compress; + sha1_update_t update; +}; + +#if defined(_MSC_VER) +#include +#if defined(IS_LITTLE_ENDIAN) +#if (_MSC_VER >= 1300) +#pragma intrinsic(_byteswap_ulong) +#define SHA_HTONL(x) _byteswap_ulong(x) +#elif defined(NSS_X86_OR_X64) +#ifndef FORCEINLINE +#if (_MSC_VER >= 1200) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE __inline +#endif /* _MSC_VER */ +#endif /* !defined FORCEINLINE */ +#define FASTCALL __fastcall + +static FORCEINLINE PRUint32 FASTCALL +swap4b(PRUint32 dwd) +{ + __asm { + mov eax,dwd + bswap eax + } +} + +#define SHA_HTONL(x) swap4b(x) +#endif /* NSS_X86_OR_X64 */ +#endif /* IS_LITTLE_ENDIAN */ + +#pragma intrinsic(_lrotr, _lrotl) +#define SHA_ROTL(x, n) _lrotl(x, n) +#define SHA_ROTL_IS_DEFINED 1 +#endif /* _MSC_VER */ + +#if defined(__GNUC__) +/* __x86_64__ and __x86_64 are defined by GCC on x86_64 CPUs */ +#if defined(SHA1_USING_64_BIT) +static __inline__ PRUint64 +SHA_ROTL(PRUint64 x, PRUint32 n) +{ + PRUint32 t = (PRUint32)x; + return ((t << n) | (t >> (32 - n))); +} +#else +static __inline__ PRUint32 +SHA_ROTL(PRUint32 t, PRUint32 n) +{ + return ((t << n) | (t >> (32 - n))); +} +#endif +#define SHA_ROTL_IS_DEFINED 1 + +#if defined(NSS_X86_OR_X64) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + __asm__("bswap %0" + : "+r"(value)); + return (value); +} +#define SHA_HTONL(x) swap4b(x) + +#elif defined(__thumb2__) || \ + (!defined(__thumb__) && \ + (defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__))) +#if defined(IS_LITTLE_ENDIAN) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + PRUint32 ret; + __asm__("rev %0, %1" + : "=r"(ret) + : "r"(value)); + return ret; +} +#define SHA_HTONL(x) swap4b(x) +#endif + +#endif /* x86 family */ + +#endif /* __GNUC__ */ + +#if !defined(SHA_ROTL_IS_DEFINED) +#define SHA_NEED_TMP_VARIABLE 1 +#define SHA_ROTL(X, n) (tmp = (X), ((tmp) << (n)) | ((tmp) >> (32 - (n)))) +#endif + +#if !defined(SHA_HTONL) +#define SHA_MASK 0x00FF00FF +#if defined(IS_LITTLE_ENDIAN) +#undef SHA_NEED_TMP_VARIABLE +#define SHA_NEED_TMP_VARIABLE 1 +#define SHA_HTONL(x) (tmp = (x), tmp = (tmp << 16) | (tmp >> 16), \ + ((tmp & SHA_MASK) << 8) | ((tmp >> 8) & SHA_MASK)) +#else +#define SHA_HTONL(x) (x) +#endif +#endif + +#define SHA_BYTESWAP(x) x = SHA_HTONL(x) + +#define SHA_STORE(n) ((PRUint32 *)hashout)[n] = SHA_HTONL(ctx->H[n]) +#if defined(HAVE_UNALIGNED_ACCESS) +#define SHA_STORE_RESULT \ + SHA_STORE(0); \ + SHA_STORE(1); \ + SHA_STORE(2); \ + SHA_STORE(3); \ + SHA_STORE(4); + +#elif defined(IS_LITTLE_ENDIAN) || defined(SHA1_USING_64_BIT) +#define SHA_STORE_RESULT \ + if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \ + SHA_STORE(0); \ + SHA_STORE(1); \ + SHA_STORE(2); \ + SHA_STORE(3); \ + SHA_STORE(4); \ + } else { \ + PRUint32 tmpbuf[5]; \ + tmpbuf[0] = SHA_HTONL(ctx->H[0]); \ + tmpbuf[1] = SHA_HTONL(ctx->H[1]); \ + tmpbuf[2] = SHA_HTONL(ctx->H[2]); \ + tmpbuf[3] = SHA_HTONL(ctx->H[3]); \ + tmpbuf[4] = SHA_HTONL(ctx->H[4]); \ + memcpy(hashout, tmpbuf, SHA1_LENGTH); \ + } + +#else +#define SHA_STORE_RESULT \ + if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \ + SHA_STORE(0); \ + SHA_STORE(1); \ + SHA_STORE(2); \ + SHA_STORE(3); \ + SHA_STORE(4); \ + } else { \ + memcpy(hashout, ctx->H, SHA1_LENGTH); \ + } +#endif + +#endif /* _SHA_FAST_H_ */ diff --git a/security/nss/lib/freebl/shsign.h b/security/nss/lib/freebl/shsign.h new file mode 100644 index 0000000000..d1a595a391 --- /dev/null +++ b/security/nss/lib/freebl/shsign.h @@ -0,0 +1,26 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SHSIGN_H_ +#define _SHSIGN_H_ + +#define SGN_SUFFIX ".chk" +#define NSS_SIGN_CHK_MAGIC1 0xf1 +#define NSS_SIGN_CHK_MAGIC2 0xc5 +/* new hmac based signatures */ +#define NSS_SIGN_CHK_MAJOR_VERSION 0x02 +#define NSS_SIGN_CHK_MINOR_VERSION 0x01 +#define NSS_SIGN_CHK_TYPE_FLAGS 0xff000000 +#define NSS_SIGN_CHK_FLAG_HMAC 0x80000000 + +typedef struct NSSSignChkHeaderStr NSSSignChkHeader; +struct NSSSignChkHeaderStr { + unsigned char magic1; + unsigned char magic2; + unsigned char majorVersion; + unsigned char minorVersion; + unsigned char offset[4]; + unsigned char type[4]; +}; +#endif /* _SHSIGN_H_ */ diff --git a/security/nss/lib/freebl/shvfy.c b/security/nss/lib/freebl/shvfy.c new file mode 100644 index 0000000000..15fde72b56 --- /dev/null +++ b/security/nss/lib/freebl/shvfy.c @@ -0,0 +1,664 @@ + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "shsign.h" +#include "prlink.h" +#include "prio.h" +#include "blapi.h" +#include "seccomon.h" +#include "secerr.h" +#include "stdio.h" +#include "prmem.h" +#include "hasht.h" +#include "pqg.h" +#include "blapii.h" +#include "secitem.h" +#include "pkcs11t.h" + +#ifndef NSS_FIPS_DISABLED + +/* + * Most modern version of Linux support a speed optimization scheme where an + * application called prelink modifies programs and shared libraries to quickly + * load if they fit into an already designed address space. In short, prelink + * scans the list of programs and libraries on your system, assigns them a + * predefined space in the the address space, then provides the fixups to the + * library. + + * The modification of the shared library is correctly detected by the freebl + * FIPS checksum scheme where we check a signed hash of the library against the + * library itself. + * + * The prelink command itself can reverse the process of modification and + * output the prestine shared library as it was before prelink made it's + * changes. If FREEBL_USE_PRELINK is set Freebl uses prelink to output the + * original copy of the shared library before prelink modified it. + */ +#ifdef FREEBL_USE_PRELINK +#ifndef FREELB_PRELINK_COMMAND +#define FREEBL_PRELINK_COMMAND "/usr/sbin/prelink -u -o -" +#endif +#include "private/pprio.h" + +#include +#include +#include +#include +#include + +/* + * This function returns an NSPR PRFileDesc * which the caller can read to + * obtain the prestine value of the shared library, before any OS related + * changes to it (usually address fixups). + * + * If prelink is installed, this + * file descriptor is a pipe connecting the output of + * /usr/sbin/prelink -u -o - {Library} + * and *pid returns the process id of the prelink child. + * + * If prelink is not installed, it returns a normal readonly handle to the + * library itself and *pid is set to '0'. + */ +PRFileDesc * +bl_OpenUnPrelink(const char *shName, int *pid) +{ + char *command = strdup(FREEBL_PRELINK_COMMAND); + char *argString = NULL; + char **argv = NULL; + char *shNameArg = NULL; + char *cp; + pid_t child; + int argc = 0, argNext = 0; + struct stat statBuf; + int pipefd[2] = { -1, -1 }; + int ret; + + *pid = 0; + + /* make sure the prelink command exists first. If not, fall back to + * just reading the file */ + for (cp = command; *cp; cp++) { + if (*cp == ' ') { + *cp++ = 0; + argString = cp; + break; + } + } + memset(&statBuf, 0, sizeof(statBuf)); + /* stat the file, follow the link */ + ret = stat(command, &statBuf); + if (ret < 0) { + free(command); + return PR_Open(shName, PR_RDONLY, 0); + } + /* file exits, make sure it's an executable */ + if (!S_ISREG(statBuf.st_mode) || + ((statBuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) { + free(command); + return PR_Open(shName, PR_RDONLY, 0); + } + + /* OK, the prelink command exists and looks correct, use it */ + /* build the arglist while we can still malloc */ + /* count the args if any */ + if (argString && *argString) { + /* argString may have leading spaces, strip them off*/ + for (cp = argString; *cp && *cp == ' '; cp++) + ; + argString = cp; + if (*cp) { + /* there is at least one arg.. */ + argc = 1; + } + + /* count the rest: Note there is no provision for escaped + * spaces here */ + for (cp = argString; *cp; cp++) { + if (*cp == ' ') { + while (*cp && *cp == ' ') + cp++; + if (*cp) + argc++; + } + } + } + + /* add the additional args: argv[0] (command), shName, NULL*/ + argc += 3; + argv = PORT_NewArray(char *, argc); + if (argv == NULL) { + goto loser; + } + + /* fill in the arglist */ + argv[argNext++] = command; + if (argString && *argString) { + argv[argNext++] = argString; + for (cp = argString; *cp; cp++) { + if (*cp == ' ') { + *cp++ = 0; + while (*cp && *cp == ' ') + cp++; + if (*cp) + argv[argNext++] = cp; + } + } + } + /* exec doesn't advertise taking const char **argv, do the paranoid + * copy */ + shNameArg = strdup(shName); + if (shNameArg == NULL) { + goto loser; + } + argv[argNext++] = shNameArg; + argv[argNext++] = 0; + + ret = pipe(pipefd); + if (ret < 0) { + goto loser; + } + + /* use vfork() so we don't trigger the pthread_at_fork() handlers */ + child = vfork(); + if (child < 0) + goto loser; + if (child == 0) { + /* set up the file descriptors */ + /* if we need to support BSD, this will need to be an open of + * /dev/null and dup2(nullFD, 0)*/ + close(0); + /* associate pipefd[1] with stdout */ + if (pipefd[1] != 1) + dup2(pipefd[1], 1); + close(2); + close(pipefd[0]); + /* should probably close the other file descriptors? */ + + execv(command, argv); + /* avoid at_exit() handlers */ + _exit(1); /* shouldn't reach here except on an error */ + } + close(pipefd[1]); + pipefd[1] = -1; + + /* this is safe because either vfork() as full fork() semantics, and thus + * already has it's own address space, or because vfork() has paused + * the parent util the exec or exit */ + free(command); + free(shNameArg); + PORT_Free(argv); + + *pid = child; + + return PR_ImportPipe(pipefd[0]); + +loser: + if (pipefd[0] != -1) { + close(pipefd[0]); + } + if (pipefd[1] != -1) { + close(pipefd[1]); + } + free(command); + free(shNameArg); + PORT_Free(argv); + + return NULL; +} + +/* + * bl_CloseUnPrelink - + * + * This closes the file descripter and reaps and children openned and crated by + * b;_OpenUnprelink. It's primary difference between it and just close is + * that it calls wait on the pid if one is supplied, preventing zombie children + * from hanging around. + */ +void +bl_CloseUnPrelink(PRFileDesc *file, int pid) +{ + /* close the file descriptor */ + PR_Close(file); + /* reap the child */ + if (pid) { + waitpid(pid, NULL, 0); + } +} +#endif + +/* #define DEBUG_SHVERIFY 1 */ + +static char * +mkCheckFileName(const char *libName) +{ + int ln_len = PORT_Strlen(libName); + int index = ln_len + 1 - sizeof("." SHLIB_SUFFIX); + char *output = PORT_Alloc(ln_len + sizeof(SGN_SUFFIX)); + if (!output) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + + if ((index > 0) && + (PORT_Strncmp(&libName[index], + "." SHLIB_SUFFIX, sizeof("." SHLIB_SUFFIX)) == 0)) { + ln_len = index; + } + PORT_Memcpy(output, libName, ln_len); + PORT_Memcpy(&output[ln_len], SGN_SUFFIX, sizeof(SGN_SUFFIX)); + return output; +} + +static int +decodeInt(unsigned char *buf) +{ + return (buf[3]) | (buf[2] << 8) | (buf[1] << 16) | (buf[0] << 24); +} + +static SECStatus +readItem(PRFileDesc *fd, SECItem *item) +{ + unsigned char buf[4]; + int bytesRead; + + bytesRead = PR_Read(fd, buf, 4); + if (bytesRead != 4) { + return SECFailure; + } + item->len = decodeInt(buf); + + item->data = PORT_Alloc(item->len); + if (item->data == NULL) { + item->len = 0; + return SECFailure; + } + bytesRead = PR_Read(fd, item->data, item->len); + if (bytesRead != item->len) { + PORT_Free(item->data); + item->data = NULL; + item->len = 0; + return SECFailure; + } + return SECSuccess; +} + +static PRBool blapi_SHVerifyFile(const char *shName, PRBool self, PRBool rerun); + +static PRBool +blapi_SHVerify(const char *name, PRFuncPtr addr, PRBool self, PRBool rerun) +{ + PRBool result = PR_FALSE; /* if anything goes wrong, + * the signature does not verify */ + /* find our shared library name */ + char *shName = PR_GetLibraryFilePathname(name, addr); + if (!shName) { + goto loser; + } + result = blapi_SHVerifyFile(shName, self, rerun); + +loser: + if (shName != NULL) { + PR_Free(shName); + } + + return result; +} + +PRBool +BLAPI_SHVerify(const char *name, PRFuncPtr addr) +{ + PRBool rerun = PR_FALSE; + if (name && *name == BLAPI_FIPS_RERUN_FLAG) { + name++; + rerun = PR_TRUE; + } + return blapi_SHVerify(name, addr, PR_FALSE, rerun); +} + +PRBool +BLAPI_SHVerifyFile(const char *shName) +{ + PRBool rerun = PR_FALSE; + if (shName && *shName == BLAPI_FIPS_RERUN_FLAG) { + shName++; + rerun = PR_TRUE; + } + return blapi_SHVerifyFile(shName, PR_FALSE, rerun); +} + +#ifndef NSS_STRICT_INTEGRITY +/* This allows checks with old shlibsign .chk files. If NSS_STRICT_INTEGRITY + * is set, we don't accept DSA */ +static PRBool +blapi_SHVerifyDSACheck(PRFileDesc *shFD, const SECHashObject *hashObj, + DSAPublicKey *key, const SECItem *signature) +{ + void *hashcx = NULL; + SECItem hash; + int bytesRead; + unsigned char hashBuf[HASH_LENGTH_MAX]; + unsigned char buf[4096]; + SECStatus rv; + + hash.type = siBuffer; + hash.data = hashBuf; + hash.len = sizeof(hashBuf); + + /* hash our library file */ + hashcx = hashObj->create(); + if (hashcx == NULL) { + return PR_FALSE; + } + hashObj->begin(hashcx); + + while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) { + hashObj->update(hashcx, buf, bytesRead); + } + hashObj->end(hashcx, hash.data, &hash.len, hash.len); + hashObj->destroy(hashcx, PR_TRUE); + + /* verify the hash against the check file */ + rv = DSA_VerifyDigest(key, signature, &hash); + PORT_Memset(hashBuf, 0, sizeof hashBuf); + return (rv == SECSuccess) ? PR_TRUE : PR_FALSE; +} +#endif + +#ifdef NSS_STRICT_INTEGRITY +/* don't allow MD2, MD5, SHA1 or SHA224 as your integrity hash */ +static PRBool +blapi_HashAllowed(SECHashObject *hashObj) +{ + switch (hashObj->type) { + case HASH_AlgSHA256: + case HASH_AlgSHA384: + case HASH_AlgSHA512: + return PR_TRUE; + default: + break; + } + return PR_FALSE; +} +#endif + +static PRBool +blapi_SHVerifyHMACCheck(PRFileDesc *shFD, const SECHashObject *hashObj, + const SECItem *key, const SECItem *signature) +{ + HMACContext *hmaccx = NULL; + SECItem hash; + int bytesRead; + unsigned char hashBuf[HASH_LENGTH_MAX]; + unsigned char buf[4096]; + SECStatus rv; + PRBool result = PR_FALSE; + +#ifdef NSS_STRICT_INTEGRITY + if (!blapi_HashAllowed(hashObj)) { + return PR_FALSE; + } +#endif + + hash.type = siBuffer; + hash.data = hashBuf; + hash.len = hashObj->length; + + /* create an hmac for the library file */ + hmaccx = HMAC_Create(hashObj, key->data, key->len, PR_TRUE); + if (hmaccx == NULL) { + return PR_FALSE; + } + HMAC_Begin(hmaccx); + + while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) { + HMAC_Update(hmaccx, buf, bytesRead); + } + rv = HMAC_Finish(hmaccx, hash.data, &hash.len, hash.len); + + HMAC_Destroy(hmaccx, PR_TRUE); + + /* verify the hmac against the check file */ + if (rv == SECSuccess) { + result = SECITEM_ItemsAreEqual(signature, &hash); + } + PORT_Memset(hashBuf, 0, sizeof hashBuf); + return result; +} + +static PRBool +blapi_SHVerifyFile(const char *shName, PRBool self, PRBool rerun) +{ + char *checkName = NULL; + PRFileDesc *checkFD = NULL; + PRFileDesc *shFD = NULL; + const SECHashObject *hashObj = NULL; + SECItem signature = { 0, NULL, 0 }; + int bytesRead, offset, type; + SECStatus rv; + SECItem hmacKey = { 0, NULL, 0 }; +#ifdef FREEBL_USE_PRELINK + int pid = 0; +#endif + PRBool result = PR_FALSE; /* if anything goes wrong, + * the signature does not verify */ + NSSSignChkHeader header; +#ifndef NSS_STRICT_INTEGRITY + DSAPublicKey key; + + PORT_Memset(&key, 0, sizeof(key)); +#endif + + /* If our integrity check was never ran or failed, fail any other + * integrity checks to prevent any token going into FIPS mode. */ + if (!self && (BL_FIPSEntryOK(PR_FALSE, rerun) != SECSuccess)) { + return PR_FALSE; + } + + if (!shName) { + goto loser; + } + + /* figure out the name of our check file */ + checkName = mkCheckFileName(shName); + if (!checkName) { + goto loser; + } + + /* open the check File */ + checkFD = PR_Open(checkName, PR_RDONLY, 0); + if (checkFD == NULL) { +#ifdef DEBUG_SHVERIFY + fprintf(stderr, "Failed to open the check file %s: (%d, %d)\n", + checkName, (int)PR_GetError(), (int)PR_GetOSError()); +#endif /* DEBUG_SHVERIFY */ + goto loser; + } + + /* read and Verify the headerthe header */ + bytesRead = PR_Read(checkFD, &header, sizeof(header)); + if (bytesRead != sizeof(header)) { + goto loser; + } + if ((header.magic1 != NSS_SIGN_CHK_MAGIC1) || + (header.magic2 != NSS_SIGN_CHK_MAGIC2)) { + goto loser; + } + /* we've bumped the version number so that newly signed .check + * files will fail nicely on old version of nss */ + if (header.majorVersion > NSS_SIGN_CHK_MAJOR_VERSION) { + goto loser; + } + if (header.minorVersion < NSS_SIGN_CHK_MINOR_VERSION) { + goto loser; + } + type = decodeInt(header.type); + + /* seek past any future header extensions */ + offset = decodeInt(header.offset); + if (PR_Seek(checkFD, offset, PR_SEEK_SET) < 0) { + goto loser; + } + + switch (type) { + case CKK_DSA: +#ifdef NSS_STRICT_INTEGRITY + goto loser; +#else + /* accept old dsa check files if NSS_STRICT_INTEGRITY is not set*/ + /* read the key */ + rv = readItem(checkFD, &key.params.prime); + if (rv != SECSuccess) { + goto loser; + } + rv = readItem(checkFD, &key.params.subPrime); + if (rv != SECSuccess) { + goto loser; + } + rv = readItem(checkFD, &key.params.base); + if (rv != SECSuccess) { + goto loser; + } + rv = readItem(checkFD, &key.publicValue); + if (rv != SECSuccess) { + goto loser; + } + /* read the signature */ + rv = readItem(checkFD, &signature); + if (rv != SECSuccess) { + goto loser; + } + hashObj = HASH_GetRawHashObject(PQG_GetHashType(&key.params)); + break; +#endif + default: + if ((type & NSS_SIGN_CHK_TYPE_FLAGS) != NSS_SIGN_CHK_FLAG_HMAC) { + goto loser; + } + /* read the HMAC Key */ + rv = readItem(checkFD, &hmacKey); + if (rv != SECSuccess) { + goto loser; + } + /* read the siganture */ + rv = readItem(checkFD, &signature); + if (rv != SECSuccess) { + goto loser; + } + hashObj = HASH_GetRawHashObject(type & ~NSS_SIGN_CHK_TYPE_FLAGS); + } + + /* done with the check file */ + PR_Close(checkFD); + checkFD = NULL; + + if (hashObj == NULL) { + goto loser; + } + +/* open our library file */ +#ifdef FREEBL_USE_PRELINK + shFD = bl_OpenUnPrelink(shName, &pid); +#else + shFD = PR_Open(shName, PR_RDONLY, 0); +#endif + if (shFD == NULL) { +#ifdef DEBUG_SHVERIFY + fprintf(stderr, "Failed to open the library file %s: (%d, %d)\n", + shName, (int)PR_GetError(), (int)PR_GetOSError()); +#endif /* DEBUG_SHVERIFY */ + goto loser; + } + + switch (type) { + case CKK_DSA: +#ifndef NSS_STRICT_INTEGRITY + result = blapi_SHVerifyDSACheck(shFD, hashObj, &key, &signature); +#endif + break; + default: + if ((type & NSS_SIGN_CHK_TYPE_FLAGS) != NSS_SIGN_CHK_FLAG_HMAC) { + break; + } + result = blapi_SHVerifyHMACCheck(shFD, hashObj, &hmacKey, &signature); + break; + } + +#ifdef FREEBL_USE_PRELINK + bl_CloseUnPrelink(shFD, pid); +#else + PR_Close(shFD); +#endif + shFD = NULL; + +loser: + PORT_Memset(&header, 0, sizeof header); + if (checkName != NULL) { + PORT_Free(checkName); + } + if (checkFD != NULL) { + PR_Close(checkFD); + } + if (shFD != NULL) { + PR_Close(shFD); + } + if (hmacKey.data != NULL) { + SECITEM_ZfreeItem(&hmacKey, PR_FALSE); + } + if (signature.data != NULL) { + SECITEM_ZfreeItem(&signature, PR_FALSE); + } +#ifndef NSS_STRICT_INTEGRITY + if (key.params.prime.data != NULL) { + SECITEM_ZfreeItem(&key.params.prime, PR_FALSE); + } + if (key.params.subPrime.data != NULL) { + SECITEM_ZfreeItem(&key.params.subPrime, PR_FALSE); + } + if (key.params.base.data != NULL) { + SECITEM_ZfreeItem(&key.params.base, PR_FALSE); + } + if (key.publicValue.data != NULL) { + SECITEM_ZfreeItem(&key.publicValue, PR_FALSE); + } +#endif + return result; +} + +PRBool +BLAPI_VerifySelf(const char *name) +{ + if (name == NULL) { + /* + * If name is NULL, freebl is statically linked into softoken. + * softoken will call BLAPI_SHVerify next to verify itself. + */ + return PR_TRUE; + } + return blapi_SHVerify(name, (PRFuncPtr)decodeInt, PR_TRUE, PR_FALSE); +} + +#else /* NSS_FIPS_DISABLED */ + +PRBool +BLAPI_SHVerifyFile(const char *shName) +{ + return PR_FALSE; +} +PRBool +BLAPI_SHVerify(const char *name, PRFuncPtr addr) +{ + return PR_FALSE; +} +PRBool +BLAPI_VerifySelf(const char *name) +{ + return PR_FALSE; +} + +#endif /* NSS_FIPS_DISABLED */ diff --git a/security/nss/lib/freebl/stubs.c b/security/nss/lib/freebl/stubs.c new file mode 100644 index 0000000000..a20d7abf3e --- /dev/null +++ b/security/nss/lib/freebl/stubs.c @@ -0,0 +1,835 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Allow freebl and softoken to be loaded without util or NSPR. + * + * These symbols are overridden once real NSPR, and libutil are attached. + */ +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Android API < 21 doesn't define RTLD_NOLOAD */ +#ifndef RTLD_NOLOAD +#define RTLD_NOLOAD 0 +#endif + +#define FREEBL_NO_WEAK 1 + +#define WEAK __attribute__((weak)) + +#ifdef FREEBL_NO_WEAK + +/* + * This uses function pointers. + * + * CONS: A separate function is needed to + * fill in the function pointers. + * + * PROS: it works on all platforms. + * it allows for dynamically finding nspr and libutil, even once + * softoken is loaded and running. (NOTE: this may be a problem if + * we switch between the stubs and real NSPR on the fly. NSPR will + * do bad things if passed an _FakeArena to free or allocate from). + */ +#define STUB_DECLARE(ret, fn, args) \ + typedef ret(*type_##fn) args; \ + static type_##fn ptr_##fn = NULL + +#define STUB_SAFE_CALL0(fn) \ + if (ptr_##fn) { \ + return ptr_##fn(); \ + } +#define STUB_SAFE_CALL1(fn, a1) \ + if (ptr_##fn) { \ + return ptr_##fn(a1); \ + } +#define STUB_SAFE_CALL2(fn, a1, a2) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2); \ + } +#define STUB_SAFE_CALL3(fn, a1, a2, a3) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3); \ + } +#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3, a4); \ + } +#define STUB_SAFE_CALL5(fn, a1, a2, a3, a4, a5) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3, a4, a5); \ + } +#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3, a4, a5, a6); \ + } + +#define STUB_FETCH_FUNCTION(fn) \ + ptr_##fn = (type_##fn)dlsym(lib, #fn); \ + if (ptr_##fn == NULL) { \ + return SECFailure; \ + } + +#else +/* + * this uses the loader weak attribute. it works automatically, but once + * freebl is loaded, the symbols are 'fixed' (later loading of NSPR or + * libutil will not resolve these symbols). + */ + +#define STUB_DECLARE(ret, fn, args) \ + WEAK extern ret fn args + +#define STUB_SAFE_CALL0(fn) \ + if (fn) { \ + return fn(); \ + } +#define STUB_SAFE_CALL1(fn, a1) \ + if (fn) { \ + return fn(a1); \ + } +#define STUB_SAFE_CALL2(fn, a1, a2) \ + if (fn) { \ + return fn(a1, a2); \ + } +#define STUB_SAFE_CALL3(fn, a1, a2, a3) \ + if (fn) { \ + return fn(a1, a2, a3); \ + } +#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \ + if (fn) { \ + return fn(a1, a2, a3, a4); \ + } +#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \ + if (fn) { \ + return fn(a1, a2, a3, a4, a5, a6); \ + } +#endif + +STUB_DECLARE(void *, PORT_Alloc_Util, (size_t len)); +STUB_DECLARE(void *, PORT_ArenaAlloc_Util, (PLArenaPool * arena, size_t size)); +STUB_DECLARE(void *, PORT_ArenaZAlloc_Util, (PLArenaPool * arena, size_t size)); +STUB_DECLARE(void, PORT_Free_Util, (void *ptr)); +STUB_DECLARE(void, PORT_FreeArena_Util, (PLArenaPool * arena, PRBool zero)); +STUB_DECLARE(int, PORT_GetError_Util, (void)); +STUB_DECLARE(PLArenaPool *, PORT_NewArena_Util, (unsigned long chunksize)); +STUB_DECLARE(void, PORT_SetError_Util, (int value)); +STUB_DECLARE(void *, PORT_ZAlloc_Util, (size_t len)); +STUB_DECLARE(void *, PORT_ZAllocAligned_Util, (size_t bytes, size_t alignment, void **mem)); +STUB_DECLARE(void *, PORT_ZAllocAlignedOffset_Util, (size_t bytes, size_t alignment, size_t offset)); +STUB_DECLARE(void, PORT_ZFree_Util, (void *ptr, size_t len)); + +STUB_DECLARE(void, PR_Assert, (const char *s, const char *file, PRIntn ln)); +STUB_DECLARE(PRStatus, PR_Access, (const char *name, PRAccessHow how)); +STUB_DECLARE(PRStatus, PR_CallOnce, (PRCallOnceType * once, PRCallOnceFN func)); +STUB_DECLARE(PRStatus, PR_Close, (PRFileDesc * fd)); +STUB_DECLARE(void, PR_DestroyLock, (PRLock * lock)); +STUB_DECLARE(void, PR_DestroyCondVar, (PRCondVar * cvar)); +STUB_DECLARE(void, PR_Free, (void *ptr)); +STUB_DECLARE(char *, PR_GetLibraryFilePathname, (const char *name, PRFuncPtr addr)); +STUB_DECLARE(PRFileDesc *, PR_ImportPipe, (PROsfd osfd)); +STUB_DECLARE(void, PR_Lock, (PRLock * lock)); +STUB_DECLARE(PRCondVar *, PR_NewCondVar, (PRLock * lock)); +STUB_DECLARE(PRLock *, PR_NewLock, (void)); +STUB_DECLARE(PRStatus, PR_NotifyCondVar, (PRCondVar * cvar)); +STUB_DECLARE(PRStatus, PR_NotifyAllCondVar, (PRCondVar * cvar)); +STUB_DECLARE(PRFileDesc *, PR_Open, (const char *name, PRIntn flags, PRIntn mode)); +STUB_DECLARE(PRInt32, PR_Read, (PRFileDesc * fd, void *buf, PRInt32 amount)); +STUB_DECLARE(PROffset32, PR_Seek, (PRFileDesc * fd, PROffset32 offset, PRSeekWhence whence)); +STUB_DECLARE(PRStatus, PR_Sleep, (PRIntervalTime ticks)); +STUB_DECLARE(PRStatus, PR_Unlock, (PRLock * lock)); +STUB_DECLARE(PRStatus, PR_WaitCondVar, (PRCondVar * cvar, PRIntervalTime timeout)); +STUB_DECLARE(char *, PR_GetEnvSecure, (const char *)); + +STUB_DECLARE(SECItem *, SECITEM_AllocItem_Util, (PLArenaPool * arena, SECItem *item, unsigned int len)); +STUB_DECLARE(SECComparison, SECITEM_CompareItem_Util, (const SECItem *a, const SECItem *b)); +STUB_DECLARE(PRBool, SECITEM_ItemsAreEqual_Util, (const SECItem *a, const SECItem *b)); +STUB_DECLARE(SECStatus, SECITEM_CopyItem_Util, (PLArenaPool * arena, SECItem *to, const SECItem *from)); +STUB_DECLARE(void, SECITEM_FreeItem_Util, (SECItem * zap, PRBool freeit)); +STUB_DECLARE(void, SECITEM_ZfreeItem_Util, (SECItem * zap, PRBool freeit)); +STUB_DECLARE(SECOidTag, SECOID_FindOIDTag_Util, (const SECItem *oid)); +STUB_DECLARE(int, NSS_SecureMemcmp, (const void *a, const void *b, size_t n)); +STUB_DECLARE(unsigned int, NSS_SecureMemcmpZero, (const void *mem, size_t n)); +STUB_DECLARE(void, NSS_SecureSelect, (void *dest, const void *src0, const void *src1, size_t n, unsigned char b)); + +#define PORT_ZNew_stub(type) (type *)PORT_ZAlloc_stub(sizeof(type)) +#define PORT_New_stub(type) (type *)PORT_Alloc_stub(sizeof(type)) +#define PORT_ZNewArray_stub(type, num) \ + (type *)PORT_ZAlloc_stub(sizeof(type) * (num)) +#define PORT_ZNewAligned_stub(type, alignment, mem) \ + (type *)PORT_ZAllocAlignedOffset_stub(sizeof(type), alignment, offsetof(type, mem)) + +/* + * NOTE: in order to support hashing only the memory allocation stubs, + * the get library name stubs, and the file io stubs are needed (the latter + * two are for the library verification). The remaining stubs are simply to + * compile. Attempts to use the library for other operations without NSPR + * will most likely fail. + */ + +/* memory */ +extern void * +PORT_Alloc_stub(size_t len) +{ + STUB_SAFE_CALL1(PORT_Alloc_Util, len); + return malloc(len); +} + +extern void +PORT_Free_stub(void *ptr) +{ + STUB_SAFE_CALL1(PORT_Free_Util, ptr); + return free(ptr); +} + +extern void * +PORT_ZAlloc_stub(size_t len) +{ + STUB_SAFE_CALL1(PORT_ZAlloc_Util, len); + void *ptr = malloc(len); + if (ptr) { + memset(ptr, 0, len); + } + return ptr; +} + +/* aligned_alloc is C11. This is an alternative to get aligned memory. */ +extern void * +PORT_ZAllocAligned_stub(size_t bytes, size_t alignment, void **mem) +{ + STUB_SAFE_CALL3(PORT_ZAllocAligned_Util, bytes, alignment, mem); + + /* This only works if alignement is a power of 2. */ + if ((alignment == 0) || (alignment & (alignment - 1))) { + return NULL; + } + + size_t x = alignment - 1; + size_t len = (bytes ? bytes : 1) + x; + + if (!mem) { + return NULL; + } + + /* Always allocate a non-zero amount of bytes */ + *mem = malloc(len); + if (!*mem) { + return NULL; + } + + memset(*mem, 0, len); + + /* We're pretty sure this is non-zero, but let's assure scan-build too. */ + void *ret = (void *)(((uintptr_t)*mem + x) & ~(uintptr_t)x); + assert(ret); + + return ret; +} + +extern void * +PORT_ZAllocAlignedOffset_stub(size_t size, size_t alignment, size_t offset) +{ + STUB_SAFE_CALL3(PORT_ZAllocAlignedOffset_Util, size, alignment, offset); + if (offset > size) { + return NULL; + } + + void *mem = NULL; + void *v = PORT_ZAllocAligned_stub(size, alignment, &mem); + if (!v) { + return NULL; + } + + *((void **)((uintptr_t)v + offset)) = mem; + return v; +} + +extern void +PORT_ZFree_stub(void *ptr, size_t len) +{ + STUB_SAFE_CALL2(PORT_ZFree_Util, ptr, len); + memset(ptr, 0, len); + return free(ptr); +} + +extern void +PR_Free_stub(void *ptr) +{ + STUB_SAFE_CALL1(PR_Free, ptr); + return free(ptr); +} + +/* we have defensive returns after abort(), which is marked noreturn on some + * platforms, making the compiler legitimately complain. */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code-return" +#endif + +/* + * arenas + * + */ +extern PLArenaPool * +PORT_NewArena_stub(unsigned long chunksize) +{ + STUB_SAFE_CALL1(PORT_NewArena_Util, chunksize); + abort(); + return NULL; +} + +extern void * +PORT_ArenaAlloc_stub(PLArenaPool *arena, size_t size) +{ + + STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size); + abort(); + return NULL; +} + +extern void * +PORT_ArenaZAlloc_stub(PLArenaPool *arena, size_t size) +{ + + STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size); + abort(); + return NULL; +} + +extern void +PORT_FreeArena_stub(PLArenaPool *arena, PRBool zero) +{ + + STUB_SAFE_CALL2(PORT_FreeArena_Util, arena, zero); + abort(); +} + +/* io */ +extern PRFileDesc * +PR_Open_stub(const char *name, PRIntn flags, PRIntn mode) +{ + int *lfd = NULL; + int fd; + int lflags = 0; + + STUB_SAFE_CALL3(PR_Open, name, flags, mode); + + if (flags & PR_RDWR) { + lflags = O_RDWR; + } else if (flags & PR_WRONLY) { + lflags = O_WRONLY; + } else { + lflags = O_RDONLY; + } + + if (flags & PR_EXCL) + lflags |= O_EXCL; + if (flags & PR_APPEND) + lflags |= O_APPEND; + if (flags & PR_TRUNCATE) + lflags |= O_TRUNC; + + fd = open(name, lflags, mode); + if (fd >= 0) { + lfd = PORT_New_stub(int); + if (lfd != NULL) { + *lfd = fd; + } else { + close(fd); + } + } + return (PRFileDesc *)lfd; +} + +extern PRFileDesc * +PR_ImportPipe_stub(PROsfd fd) +{ + int *lfd = NULL; + + STUB_SAFE_CALL1(PR_ImportPipe, fd); + + lfd = PORT_New_stub(int); + if (lfd != NULL) { + *lfd = fd; + } + return (PRFileDesc *)lfd; +} + +extern PRStatus +PR_Close_stub(PRFileDesc *fd) +{ + int *lfd; + STUB_SAFE_CALL1(PR_Close, fd); + + lfd = (int *)fd; + close(*lfd); + PORT_Free_stub(lfd); + + return PR_SUCCESS; +} + +extern PRInt32 +PR_Read_stub(PRFileDesc *fd, void *buf, PRInt32 amount) +{ + int *lfd; + STUB_SAFE_CALL3(PR_Read, fd, buf, amount); + + lfd = (int *)fd; + return read(*lfd, buf, amount); +} + +extern PROffset32 +PR_Seek_stub(PRFileDesc *fd, PROffset32 offset, PRSeekWhence whence) +{ + int *lfd; + int lwhence = SEEK_SET; + STUB_SAFE_CALL3(PR_Seek, fd, offset, whence); + lfd = (int *)fd; + switch (whence) { + case PR_SEEK_CUR: + lwhence = SEEK_CUR; + break; + case PR_SEEK_END: + lwhence = SEEK_END; + break; + case PR_SEEK_SET: + break; + } + + return lseek(*lfd, offset, lwhence); +} + +PRStatus +PR_Access_stub(const char *name, PRAccessHow how) +{ + int mode = F_OK; + int rv; + STUB_SAFE_CALL2(PR_Access, name, how); + switch (how) { + case PR_ACCESS_WRITE_OK: + mode = W_OK; + break; + case PR_ACCESS_READ_OK: + mode = R_OK; + break; + /* assume F_OK for all others */ + default: + break; + } + rv = access(name, mode); + if (rv == 0) { + return PR_SUCCESS; + } + return PR_FAILURE; +} + +/* + * library + */ +extern char * +PR_GetLibraryFilePathname_stub(const char *name, PRFuncPtr addr) +{ + Dl_info dli; + char *result; + + STUB_SAFE_CALL2(PR_GetLibraryFilePathname, name, addr); + + if (dladdr((void *)addr, &dli) == 0) { + return NULL; + } + result = PORT_Alloc_stub(strlen(dli.dli_fname) + 1); + if (result != NULL) { + strcpy(result, dli.dli_fname); + } + return result; +} + +#include + +/* errors */ +extern int +PORT_GetError_stub(void) +{ + STUB_SAFE_CALL0(PORT_GetError_Util); + return errno; +} + +extern void +PORT_SetError_stub(int value) +{ + STUB_SAFE_CALL1(PORT_SetError_Util, value); + errno = value; +} + +/* misc */ +extern void +PR_Assert_stub(const char *s, const char *file, PRIntn ln) +{ + STUB_SAFE_CALL3(PR_Assert, s, file, ln); + fprintf(stderr, "%s line %d: %s\n", file, ln, s); + abort(); +} + +/* time */ +extern PRStatus +PR_Sleep_stub(PRIntervalTime ticks) +{ + STUB_SAFE_CALL1(PR_Sleep, ticks); + usleep(ticks * 1000); + return PR_SUCCESS; +} + +/* locking */ +extern PRLock * +PR_NewLock_stub(void) +{ + STUB_SAFE_CALL0(PR_NewLock); + abort(); + return NULL; +} + +extern PRStatus +PR_Unlock_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_Unlock, lock); + abort(); + return PR_FAILURE; +} + +extern void +PR_Lock_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_Lock, lock); + abort(); + return; +} + +extern void +PR_DestroyLock_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_DestroyLock, lock); + abort(); + return; +} + +extern PRCondVar * +PR_NewCondVar_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_NewCondVar, lock); + abort(); + return NULL; +} + +extern PRStatus +PR_NotifyCondVar_stub(PRCondVar *cvar) +{ + STUB_SAFE_CALL1(PR_NotifyCondVar, cvar); + abort(); + return PR_FAILURE; +} + +extern PRStatus +PR_NotifyAllCondVar_stub(PRCondVar *cvar) +{ + STUB_SAFE_CALL1(PR_NotifyAllCondVar, cvar); + abort(); + return PR_FAILURE; +} + +extern PRStatus +PR_WaitCondVar_stub(PRCondVar *cvar, PRIntervalTime timeout) +{ + STUB_SAFE_CALL2(PR_WaitCondVar, cvar, timeout); + abort(); + return PR_FAILURE; +} + +extern char * +PR_GetEnvSecure_stub(const char *var) +{ + STUB_SAFE_CALL1(PR_GetEnvSecure, var); +#ifdef __USE_GNU + return secure_getenv(var); +#else + return getenv(var); +#endif +} + +extern void +PR_DestroyCondVar_stub(PRCondVar *cvar) +{ + STUB_SAFE_CALL1(PR_DestroyCondVar, cvar); + abort(); + return; +} + +/* + * NOTE: this presupposes GCC 4.1 + */ +extern PRStatus +PR_CallOnce_stub(PRCallOnceType *once, PRCallOnceFN func) +{ + STUB_SAFE_CALL2(PR_CallOnce, once, func); + abort(); + return PR_FAILURE; +} + +/* + * SECITEMS implement Item Utilities + */ +extern void +SECITEM_FreeItem_stub(SECItem *zap, PRBool freeit) +{ + STUB_SAFE_CALL2(SECITEM_FreeItem_Util, zap, freeit); + abort(); +} + +extern SECItem * +SECITEM_AllocItem_stub(PLArenaPool *arena, SECItem *item, unsigned int len) +{ + STUB_SAFE_CALL3(SECITEM_AllocItem_Util, arena, item, len); + abort(); + return NULL; +} + +extern SECComparison +SECITEM_CompareItem_stub(const SECItem *a, const SECItem *b) +{ + STUB_SAFE_CALL2(SECITEM_CompareItem_Util, a, b); + abort(); + return SECEqual; +} + +extern PRBool +SECITEM_ItemsAreEqual_stub(const SECItem *a, const SECItem *b) +{ + STUB_SAFE_CALL2(SECITEM_ItemsAreEqual_Util, a, b); + /* two nulls are equal */ + if (!a && !b) { + return PR_TRUE; + } + /* only one NULL is not equal */ + if (!a || !b) { + return PR_FALSE; + } + /* we know both secitems have been set, now make sure the lengths + * are equal */ + if (a->len != b->len) { + return PR_FALSE; + } + /* lengths are equal, safe to verify the data */ + if (PORT_Memcmp(a->data, b->data, b->len) != 0) { + return PR_FALSE; + } + return PR_TRUE; +} + +extern SECStatus +SECITEM_CopyItem_stub(PLArenaPool *arena, SECItem *to, const SECItem *from) +{ + STUB_SAFE_CALL3(SECITEM_CopyItem_Util, arena, to, from); + abort(); + return SECFailure; +} + +extern SECOidTag +SECOID_FindOIDTag_stub(const SECItem *oid) +{ + STUB_SAFE_CALL1(SECOID_FindOIDTag_Util, oid); + abort(); + return SEC_OID_UNKNOWN; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +extern void +SECITEM_ZfreeItem_stub(SECItem *zap, PRBool freeit) +{ + STUB_SAFE_CALL2(SECITEM_ZfreeItem_Util, zap, freeit); + if (zap) { + if (zap->data) { + PORT_Memset(zap->data, 0, zap->len); + PORT_Free_stub(zap->data); + } + PORT_Memset(zap, 0, sizeof(SECItem)); + if (freeit) { + PORT_Free_stub(zap); + } + } +} + +extern int +NSS_SecureMemcmp_stub(const void *a, const void *b, size_t n) +{ + STUB_SAFE_CALL3(NSS_SecureMemcmp, a, b, n); + abort(); +} + +extern unsigned int +NSS_SecureMemcmpZero_stub(const void *mem, size_t n) +{ + STUB_SAFE_CALL2(NSS_SecureMemcmpZero, mem, n); + abort(); +} + +extern void +NSS_SecureSelect_stub(void *dest, const void *src0, const void *src1, size_t n, unsigned char b) +{ + STUB_SAFE_CALL5(NSS_SecureSelect, dest, src0, src1, n, b); + abort(); +} + +#ifdef FREEBL_NO_WEAK + +static const char *nsprLibName = SHLIB_PREFIX "nspr4." SHLIB_SUFFIX; +static const char *nssutilLibName = SHLIB_PREFIX "nssutil3." SHLIB_SUFFIX; + +static SECStatus +freebl_InitNSPR(void *lib) +{ + STUB_FETCH_FUNCTION(PR_Free); + STUB_FETCH_FUNCTION(PR_Open); + STUB_FETCH_FUNCTION(PR_ImportPipe); + STUB_FETCH_FUNCTION(PR_Close); + STUB_FETCH_FUNCTION(PR_Read); + STUB_FETCH_FUNCTION(PR_Seek); + STUB_FETCH_FUNCTION(PR_GetLibraryFilePathname); + STUB_FETCH_FUNCTION(PR_Assert); + STUB_FETCH_FUNCTION(PR_Access); + STUB_FETCH_FUNCTION(PR_Sleep); + STUB_FETCH_FUNCTION(PR_CallOnce); + STUB_FETCH_FUNCTION(PR_NewCondVar); + STUB_FETCH_FUNCTION(PR_NotifyCondVar); + STUB_FETCH_FUNCTION(PR_NotifyAllCondVar); + STUB_FETCH_FUNCTION(PR_WaitCondVar); + STUB_FETCH_FUNCTION(PR_DestroyCondVar); + STUB_FETCH_FUNCTION(PR_NewLock); + STUB_FETCH_FUNCTION(PR_Unlock); + STUB_FETCH_FUNCTION(PR_Lock); + STUB_FETCH_FUNCTION(PR_DestroyLock); + STUB_FETCH_FUNCTION(PR_GetEnvSecure); + return SECSuccess; +} + +static SECStatus +freebl_InitNSSUtil(void *lib) +{ + STUB_FETCH_FUNCTION(PORT_Alloc_Util); + STUB_FETCH_FUNCTION(PORT_Free_Util); + STUB_FETCH_FUNCTION(PORT_ZAlloc_Util); + STUB_FETCH_FUNCTION(PORT_ZFree_Util); + STUB_FETCH_FUNCTION(PORT_NewArena_Util); + STUB_FETCH_FUNCTION(PORT_ArenaAlloc_Util); + STUB_FETCH_FUNCTION(PORT_ArenaZAlloc_Util); + STUB_FETCH_FUNCTION(PORT_FreeArena_Util); + STUB_FETCH_FUNCTION(PORT_GetError_Util); + STUB_FETCH_FUNCTION(PORT_SetError_Util); + STUB_FETCH_FUNCTION(SECITEM_FreeItem_Util); + STUB_FETCH_FUNCTION(SECITEM_AllocItem_Util); + STUB_FETCH_FUNCTION(SECITEM_CompareItem_Util); + STUB_FETCH_FUNCTION(SECITEM_CopyItem_Util); + STUB_FETCH_FUNCTION(SECITEM_ZfreeItem_Util); + STUB_FETCH_FUNCTION(SECOID_FindOIDTag_Util); + STUB_FETCH_FUNCTION(NSS_SecureMemcmp); + STUB_FETCH_FUNCTION(NSS_SecureMemcmpZero); + STUB_FETCH_FUNCTION(NSS_SecureSelect); + return SECSuccess; +} + +/* + * fetch the library if it's loaded. For NSS it should already be loaded + */ +#define freebl_getLibrary(libName) \ + dlopen(libName, RTLD_LAZY | RTLD_NOLOAD) + +#define freebl_releaseLibrary(lib) \ + if (lib) \ + dlclose(lib) + +static void *FREEBLnsprGlobalLib = NULL; +static void *FREEBLnssutilGlobalLib = NULL; + +void __attribute((destructor)) FREEBL_unload() +{ + freebl_releaseLibrary(FREEBLnsprGlobalLib); + freebl_releaseLibrary(FREEBLnssutilGlobalLib); +} +#endif + +/* + * load the symbols from the real libraries if available. + * + * if force is set, explicitly load the libraries if they are not already + * loaded. If we could not use the real libraries, return failure. + */ +extern SECStatus +FREEBL_InitStubs() +{ + SECStatus rv = SECSuccess; +#ifdef FREEBL_NO_WEAK + void *nspr = NULL; + void *nssutil = NULL; + + /* NSPR should be first */ + if (!FREEBLnsprGlobalLib) { + nspr = freebl_getLibrary(nsprLibName); + if (!nspr) { + return SECFailure; + } + rv = freebl_InitNSPR(nspr); + if (rv != SECSuccess) { + freebl_releaseLibrary(nspr); + return rv; + } + FREEBLnsprGlobalLib = nspr; /* adopt */ + } + /* now load NSSUTIL */ + if (!FREEBLnssutilGlobalLib) { + nssutil = freebl_getLibrary(nssutilLibName); + if (!nssutil) { + return SECFailure; + } + rv = freebl_InitNSSUtil(nssutil); + if (rv != SECSuccess) { + freebl_releaseLibrary(nssutil); + return rv; + } + FREEBLnssutilGlobalLib = nssutil; /* adopt */ + } +#endif + + return rv; +} diff --git a/security/nss/lib/freebl/stubs.h b/security/nss/lib/freebl/stubs.h new file mode 100644 index 0000000000..f773e10433 --- /dev/null +++ b/security/nss/lib/freebl/stubs.h @@ -0,0 +1,71 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Allow freebl and softoken to be loaded without util or NSPR. + * + * These symbols are overridden once real NSPR, and libutil are attached. + */ + +#ifndef _STUBS_H +#define _STUBS_H_ 1 + +#ifdef _LIBUTIL_H_ +/* must be included before util */ +/*#error stubs.h included too late */ +#define MP_DIGITES(x) "stubs included too late" +#endif + +/* hide libutil rename */ +#define _LIBUTIL_H_ 1 + +#define PORT_Alloc PORT_Alloc_stub +#define PORT_ArenaAlloc PORT_ArenaAlloc_stub +#define PORT_ArenaZAlloc PORT_ArenaZAlloc_stub +#define PORT_Free PORT_Free_stub +#define PORT_FreeArena PORT_FreeArena_stub +#define PORT_GetError PORT_GetError_stub +#define PORT_NewArena PORT_NewArena_stub +#define PORT_SetError PORT_SetError_stub +#define PORT_ZAlloc PORT_ZAlloc_stub +#define PORT_ZFree PORT_ZFree_stub +#define PORT_ZAllocAligned PORT_ZAllocAligned_stub +#define PORT_ZAllocAlignedOffset PORT_ZAllocAlignedOffset_stub + +#define SECITEM_AllocItem SECITEM_AllocItem_stub +#define SECITEM_CompareItem SECITEM_CompareItem_stub +#define SECITEM_ItemsAreEqual SECITEM_ItemsAreEqual_stub +#define SECITEM_CopyItem SECITEM_CopyItem_stub +#define SECITEM_FreeItem SECITEM_FreeItem_stub +#define SECITEM_ZfreeItem SECITEM_ZfreeItem_stub +#define SECOID_FindOIDTag SECOID_FindOIDTag_stub +#define NSS_SecureMemcmp NSS_SecureMemcmp_stub +#define NSS_SecureMemcmpZero NSS_SecureMemcmpZero_stub +#define NSS_SecureSelect NSS_SecureSelect_stub + +#define PR_Assert PR_Assert_stub +#define PR_Access PR_Access_stub +#define PR_CallOnce PR_CallOnce_stub +#define PR_Close PR_Close_stub +#define PR_DestroyCondVar PR_DestroyCondVar_stub +#define PR_DestroyLock PR_DestroyLock_stub +#define PR_Free PR_Free_stub +#define PR_GetLibraryFilePathname PR_GetLibraryFilePathname_stub +#define PR_ImportPipe PR_ImportPipe_stub +#define PR_Lock PR_Lock_stub +#define PR_NewCondVar PR_NewCondVar_stub +#define PR_NewLock PR_NewLock_stub +#define PR_NotifyCondVar PR_NotifyCondVar_stub +#define PR_NotifyAllCondVar PR_NotifyAllCondVar_stub +#define PR_Open PR_Open_stub +#define PR_Read PR_Read_stub +#define PR_Seek PR_Seek_stub +#define PR_Sleep PR_Sleep_stub +#define PR_Unlock PR_Unlock_stub +#define PR_WaitCondVar PR_WaitCondVar_stub +#define PR_GetEnvSecure PR_GetEnvSecure_stub + +extern int FREEBL_InitStubs(void); + +#endif diff --git a/security/nss/lib/freebl/sysrand.c b/security/nss/lib/freebl/sysrand.c new file mode 100644 index 0000000000..814dd6e515 --- /dev/null +++ b/security/nss/lib/freebl/sysrand.c @@ -0,0 +1,18 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "seccomon.h" + +#if defined(XP_UNIX) && defined(SEED_ONLY_DEV_URANDOM) +#include "unix_urandom.c" +#elif defined(XP_UNIX) +#include "unix_rand.c" +#endif +#ifdef XP_WIN +#include "win_rand.c" +#endif diff --git a/security/nss/lib/freebl/tlsprfalg.c b/security/nss/lib/freebl/tlsprfalg.c new file mode 100644 index 0000000000..1e5e67886c --- /dev/null +++ b/security/nss/lib/freebl/tlsprfalg.c @@ -0,0 +1,134 @@ +/* tlsprfalg.c - TLS Pseudo Random Function (PRF) implementation + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "hasht.h" +#include "alghmac.h" + +#define PHASH_STATE_MAX_LEN HASH_LENGTH_MAX + +/* TLS P_hash function */ +SECStatus +TLS_P_hash(HASH_HashType hashType, const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS) +{ + unsigned char state[PHASH_STATE_MAX_LEN]; + unsigned char outbuf[PHASH_STATE_MAX_LEN]; + unsigned int state_len = 0, label_len = 0, outbuf_len = 0, chunk_size; + unsigned int remaining; + unsigned char *res; + SECStatus status; + HMACContext *cx; + SECStatus rv = SECFailure; + const SECHashObject *hashObj = HASH_GetRawHashObject(hashType); + + PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len)); + PORT_Assert((seed != NULL) && (seed->data != NULL)); + PORT_Assert((result != NULL) && (result->data != NULL)); + + remaining = result->len; + res = result->data; + + if (label != NULL) + label_len = PORT_Strlen(label); + + cx = HMAC_Create(hashObj, secret->data, secret->len, isFIPS); + if (cx == NULL) + goto loser; + + /* initialize the state = A(1) = HMAC_hash(secret, seed) */ + HMAC_Begin(cx); + HMAC_Update(cx, (unsigned char *)label, label_len); + HMAC_Update(cx, seed->data, seed->len); + status = HMAC_Finish(cx, state, &state_len, sizeof(state)); + if (status != SECSuccess) + goto loser; + + /* generate a block at a time until we're done */ + while (remaining > 0) { + + HMAC_Begin(cx); + HMAC_Update(cx, state, state_len); + if (label_len) + HMAC_Update(cx, (unsigned char *)label, label_len); + HMAC_Update(cx, seed->data, seed->len); + status = HMAC_Finish(cx, outbuf, &outbuf_len, sizeof(outbuf)); + if (status != SECSuccess) + goto loser; + + /* Update the state = A(i) = HMAC_hash(secret, A(i-1)) */ + HMAC_Begin(cx); + HMAC_Update(cx, state, state_len); + status = HMAC_Finish(cx, state, &state_len, sizeof(state)); + if (status != SECSuccess) + goto loser; + + chunk_size = PR_MIN(outbuf_len, remaining); + PORT_Memcpy(res, &outbuf, chunk_size); + res += chunk_size; + remaining -= chunk_size; + } + + rv = SECSuccess; + +loser: + /* clear out state so it's not left on the stack */ + if (cx) + HMAC_Destroy(cx, PR_TRUE); + PORT_Memset(state, 0, sizeof(state)); + PORT_Memset(outbuf, 0, sizeof(outbuf)); + return rv; +} + +SECStatus +TLS_PRF(const SECItem *secret, const char *label, SECItem *seed, + SECItem *result, PRBool isFIPS) +{ + SECStatus rv = SECFailure, status; + unsigned int i; + SECItem tmp = { siBuffer, NULL, 0 }; + SECItem S1; + SECItem S2; + + PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len)); + PORT_Assert((seed != NULL) && (seed->data != NULL)); + PORT_Assert((result != NULL) && (result->data != NULL)); + + S1.type = siBuffer; + S1.len = (secret->len / 2) + (secret->len & 1); + S1.data = secret->data; + + S2.type = siBuffer; + S2.len = S1.len; + S2.data = secret->data + (secret->len - S2.len); + + tmp.data = (unsigned char *)PORT_Alloc(result->len); + if (tmp.data == NULL) + goto loser; + tmp.len = result->len; + + status = TLS_P_hash(HASH_AlgMD5, &S1, label, seed, result, isFIPS); + if (status != SECSuccess) + goto loser; + + status = TLS_P_hash(HASH_AlgSHA1, &S2, label, seed, &tmp, isFIPS); + if (status != SECSuccess) + goto loser; + + for (i = 0; i < result->len; i++) + result->data[i] ^= tmp.data[i]; + + rv = SECSuccess; + +loser: + if (tmp.data != NULL) + PORT_ZFree(tmp.data, tmp.len); + return rv; +} diff --git a/security/nss/lib/freebl/unix_rand.c b/security/nss/lib/freebl/unix_rand.c new file mode 100644 index 0000000000..e46bac6233 --- /dev/null +++ b/security/nss/lib/freebl/unix_rand.c @@ -0,0 +1,811 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "secrng.h" +#include "secerr.h" +#include "prerror.h" +#include "prthread.h" +#include "prprf.h" +#include "prenv.h" + +size_t RNG_FileUpdate(const char *fileName, size_t limit); + +/* + * When copying data to the buffer we want the least signicant bytes + * from the input since those bits are changing the fastest. The address + * of least significant byte depends upon whether we are running on + * a big-endian or little-endian machine. + * + * Does this mean the least signicant bytes are the most significant + * to us? :-) + */ + +static size_t +CopyLowBits(void *dst, size_t dstlen, void *src, size_t srclen) +{ + union endianness { + PRInt32 i; + char c[4]; + } u; + + if (srclen <= dstlen) { + memcpy(dst, src, srclen); + return srclen; + } + u.i = 0x01020304; + if (u.c[0] == 0x01) { + /* big-endian case */ + memcpy(dst, (char *)src + (srclen - dstlen), dstlen); + } else { + /* little-endian case */ + memcpy(dst, src, dstlen); + } + return dstlen; +} + +#ifdef SOLARIS + +#include + +static const PRUint32 entropy_buf_len = 4096; /* buffer up to 4 KB */ + +/* Buffer entropy data, and feed it to the RNG, entropy_buf_len bytes at a time. + * Returns error if RNG_RandomUpdate fails. Also increments *total_fed + * by the number of bytes successfully buffered. + */ +static SECStatus +BufferEntropy(char *inbuf, PRUint32 inlen, + char *entropy_buf, PRUint32 *entropy_buffered, + PRUint32 *total_fed) +{ + PRUint32 tocopy = 0; + PRUint32 avail = 0; + SECStatus rv = SECSuccess; + + while (inlen) { + avail = entropy_buf_len - *entropy_buffered; + if (!avail) { + /* Buffer is full, time to feed it to the RNG. */ + rv = RNG_RandomUpdate(entropy_buf, entropy_buf_len); + if (SECSuccess != rv) { + break; + } + *entropy_buffered = 0; + avail = entropy_buf_len; + } + tocopy = PR_MIN(avail, inlen); + memcpy(entropy_buf + *entropy_buffered, inbuf, tocopy); + *entropy_buffered += tocopy; + inlen -= tocopy; + inbuf += tocopy; + *total_fed += tocopy; + } + return rv; +} + +/* Feed kernel statistics structures and ks_data field to the RNG. + * Returns status as well as the number of bytes successfully fed to the RNG. + */ +static SECStatus +RNG_kstat(PRUint32 *fed) +{ + kstat_ctl_t *kc = NULL; + kstat_t *ksp = NULL; + PRUint32 entropy_buffered = 0; + char *entropy_buf = NULL; + SECStatus rv = SECSuccess; + + PORT_Assert(fed); + if (!fed) { + return SECFailure; + } + *fed = 0; + + kc = kstat_open(); + PORT_Assert(kc); + if (!kc) { + return SECFailure; + } + entropy_buf = (char *)PORT_Alloc(entropy_buf_len); + PORT_Assert(entropy_buf); + if (entropy_buf) { + for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { + if (-1 == kstat_read(kc, ksp, NULL)) { + /* missing data from a single kstat shouldn't be fatal */ + continue; + } + rv = BufferEntropy((char *)ksp, sizeof(kstat_t), + entropy_buf, &entropy_buffered, + fed); + if (SECSuccess != rv) { + break; + } + + if (ksp->ks_data && ksp->ks_data_size > 0 && ksp->ks_ndata > 0) { + rv = BufferEntropy((char *)ksp->ks_data, ksp->ks_data_size, + entropy_buf, &entropy_buffered, + fed); + if (SECSuccess != rv) { + break; + } + } + } + if (SECSuccess == rv && entropy_buffered) { + /* Buffer is not empty, time to feed it to the RNG */ + rv = RNG_RandomUpdate(entropy_buf, entropy_buffered); + } + PORT_Free(entropy_buf); + } else { + rv = SECFailure; + } + if (kstat_close(kc)) { + PORT_Assert(0); + rv = SECFailure; + } + return rv; +} + +#endif + +#if defined(SCO) || defined(UNIXWARE) || defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(DARWIN) || defined(OPENBSD) || defined(NTO) || defined(__riscos__) || defined(__GNU__) || defined(__FreeBSD_kernel__) || defined(__NetBSD_kernel__) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + int ticks; + struct tms buffer; + + ticks = times(&buffer); + return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks)); +} + +static void +GiveSystemInfo(void) +{ + long si; + + /* + * Is this really necessary? Why not use rand48 or something? + */ + si = sysconf(_SC_CHILD_MAX); + RNG_RandomUpdate(&si, sizeof(si)); + + si = sysconf(_SC_STREAM_MAX); + RNG_RandomUpdate(&si, sizeof(si)); + + si = sysconf(_SC_OPEN_MAX); + RNG_RandomUpdate(&si, sizeof(si)); +} +#endif + +#if defined(__sun) +#if defined(__svr4) || defined(SVR4) +#include + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + hrtime_t t; + t = gethrtime(); + if (t) { + return CopyLowBits(buf, maxbytes, &t, sizeof(t)); + } + return 0; +} +#else /* SunOS (Sun, but not SVR4) */ + +extern long sysconf(int name); + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + long si; + + /* This is not very good */ + si = sysconf(_SC_CHILD_MAX); + RNG_RandomUpdate(&si, sizeof(si)); +} +#endif +#endif /* Sun */ + +#if defined(__hpux) +#include + +#if defined(__ia64) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + PRUint64 t; + + t = _Asm_mov_from_ar(_AREG44); + return CopyLowBits(buf, maxbytes, &t, sizeof(t)); +} +#else +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + extern int ret_cr16(); + int cr16val; + + cr16val = ret_cr16(); + return CopyLowBits(buf, maxbytes, &cr16val, sizeof(cr16val)); +} +#endif + +static void +GiveSystemInfo(void) +{ + long si; + + /* This is not very good */ + si = sysconf(_AES_OS_VERSION); + RNG_RandomUpdate(&si, sizeof(si)); + si = sysconf(_SC_CPU_VERSION); + RNG_RandomUpdate(&si, sizeof(si)); +} +#endif /* HPUX */ + +#if defined(_IBMR2) +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + /* XXX haven't found any yet! */ +} +#endif /* IBM R2 */ + +#if defined(LINUX) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ +#ifndef NO_SYSINFO + struct sysinfo si; + if (sysinfo(&si) == 0) { + RNG_RandomUpdate(&si, sizeof(si)); + } +#endif +} +#endif /* LINUX */ + +#if defined(NCR) + +#include +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +#endif /* NCR */ + +#if defined(sgi) +#include +#undef PRIVATE +#include +#include +#include +#include +#include +#include + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[4096]; + + rv = syssgi(SGI_SYSID, &buf[0]); + if (rv > 0) { + RNG_RandomUpdate(buf, MAXSYSIDSIZE); + } +#ifdef SGI_RDUBLK + rv = syssgi(SGI_RDUBLK, getpid(), &buf[0], sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, sizeof(buf)); + } +#endif /* SGI_RDUBLK */ + rv = syssgi(SGI_INVENT, SGI_INV_READ, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, sizeof(buf)); + } + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +static size_t +GetHighResClock(void *buf, size_t maxbuf) +{ + unsigned phys_addr, raddr, cycleval; + static volatile unsigned *iotimer_addr = NULL; + static int tries = 0; + static int cntr_size; + int mfd; + long s0[2]; + struct timeval tv; + +#ifndef SGI_CYCLECNTR_SIZE +#define SGI_CYCLECNTR_SIZE 165 /* Size user needs to use to read CC */ +#endif + + if (iotimer_addr == NULL) { + if (tries++ > 1) { + /* Don't keep trying if it didn't work */ + return 0; + } + + /* + ** For SGI machines we can use the cycle counter, if it has one, + ** to generate some truly random numbers + */ + phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval); + if (phys_addr) { + int pgsz = getpagesize(); + int pgoffmask = pgsz - 1; + + raddr = phys_addr & ~pgoffmask; + mfd = open("/dev/mmem", O_RDONLY); + if (mfd < 0) { + return 0; + } + iotimer_addr = (unsigned *) + mmap(0, pgoffmask, PROT_READ, MAP_PRIVATE, mfd, (int)raddr); + if (iotimer_addr == (void *)-1) { + close(mfd); + iotimer_addr = NULL; + return 0; + } + iotimer_addr = (unsigned *)((__psint_t)iotimer_addr | (phys_addr & pgoffmask)); + /* + * The file 'mfd' is purposefully not closed. + */ + cntr_size = syssgi(SGI_CYCLECNTR_SIZE); + if (cntr_size < 0) { + struct utsname utsinfo; + + /* + * We must be executing on a 6.0 or earlier system, since the + * SGI_CYCLECNTR_SIZE call is not supported. + * + * The only pre-6.1 platforms with 64-bit counters are + * IP19 and IP21 (Challenge, PowerChallenge, Onyx). + */ + uname(&utsinfo); + if (!strncmp(utsinfo.machine, "IP19", 4) || + !strncmp(utsinfo.machine, "IP21", 4)) + cntr_size = 64; + else + cntr_size = 32; + } + cntr_size /= 8; /* Convert from bits to bytes */ + } + } + + s0[0] = *iotimer_addr; + if (cntr_size > 4) + s0[1] = *(iotimer_addr + 1); + memcpy(buf, (char *)&s0[0], cntr_size); + return CopyLowBits(buf, maxbuf, &s0, cntr_size); +} +#endif + +#if defined(sony) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} +#endif /* sony */ + +#if defined(sinix) +#include +#include + +int gettimeofday(struct timeval *, struct timezone *); +int gethostname(char *, int); + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + int ticks; + struct tms buffer; + + ticks = times(&buffer); + return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks)); +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} +#endif /* sinix */ + +#if defined(nec_ews) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} +#endif /* nec_ews */ + +size_t +RNG_GetNoise(void *buf, size_t maxbytes) +{ + struct timeval tv; + int n = 0; + int c; + + n = GetHighResClock(buf, maxbytes); + maxbytes -= n; + + (void)gettimeofday(&tv, 0); + c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_usec, sizeof(tv.tv_usec)); + n += c; + maxbytes -= c; + c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_sec, sizeof(tv.tv_sec)); + n += c; + return n; +} + +#ifdef DARWIN +#include +#if !TARGET_OS_IPHONE +#include +#endif +#endif + +void +RNG_SystemInfoForRNG(void) +{ + char buf[BUFSIZ]; + size_t bytes; + const char *const *cp; + char *randfile; +#ifdef DARWIN +#if TARGET_OS_IPHONE + /* iOS does not expose a way to access environ. */ + char **environ = NULL; +#else + char **environ = *_NSGetEnviron(); +#endif +#else + extern char **environ; +#endif + static const char *const files[] = { + "/etc/passwd", + "/etc/utmp", + "/tmp", + "/var/tmp", + "/usr/tmp", + 0 + }; + + GiveSystemInfo(); + + bytes = RNG_GetNoise(buf, sizeof(buf)); + RNG_RandomUpdate(buf, bytes); + + /* + * Pass the C environment and the addresses of the pointers to the + * hash function. This makes the random number function depend on the + * execution environment of the user and on the platform the program + * is running on. + */ + if (environ != NULL) { + cp = (const char *const *)environ; + while (*cp) { + RNG_RandomUpdate(*cp, strlen(*cp)); + cp++; + } + RNG_RandomUpdate(environ, (char *)cp - (char *)environ); + } + + /* Give in system information */ + if (gethostname(buf, sizeof(buf)) == 0) { + RNG_RandomUpdate(buf, strlen(buf)); + } + + /* grab some data from system's PRNG before any other files. */ + bytes = RNG_FileUpdate("/dev/urandom", SYSTEM_RNG_SEED_COUNT); + if (!bytes) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + } + + /* If the user points us to a random file, pass it through the rng */ + randfile = PR_GetEnvSecure("NSRANDFILE"); + if ((randfile != NULL) && (randfile[0] != '\0')) { + char *randCountString = PR_GetEnvSecure("NSRANDCOUNT"); + int randCount = randCountString ? atoi(randCountString) : 0; + if (randCount != 0) { + RNG_FileUpdate(randfile, randCount); + } else { + RNG_FileForRNG(randfile); + } + } + + /* pass other files through */ + for (cp = files; *cp; cp++) + RNG_FileForRNG(*cp); + +#if defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(OPENBSD) || defined(DARWIN) || defined(LINUX) || defined(HPUX) + if (bytes) + return; +#endif + +#ifdef SOLARIS + if (!bytes) { + /* On Solaris 8, /dev/urandom isn't available, so we use libkstat. */ + PRUint32 kstat_bytes = 0; + if (SECSuccess != RNG_kstat(&kstat_bytes)) { + PORT_Assert(0); + } + bytes += kstat_bytes; + PORT_Assert(bytes); + } +#endif +} + +#define TOTAL_FILE_LIMIT 1000000 /* one million */ + +size_t +RNG_FileUpdate(const char *fileName, size_t limit) +{ + FILE *file; + int fd; + int bytes; + size_t fileBytes = 0; + struct stat stat_buf; + unsigned char buffer[BUFSIZ]; + static size_t totalFileBytes = 0; + + /* suppress valgrind warnings due to holes in struct stat */ + memset(&stat_buf, 0, sizeof(stat_buf)); + + if (stat((char *)fileName, &stat_buf) < 0) + return fileBytes; + RNG_RandomUpdate(&stat_buf, sizeof(stat_buf)); + + file = fopen(fileName, "r"); + if (file != NULL) { + /* Read from the underlying file descriptor directly to bypass stdio + * buffering and avoid reading more bytes than we need from + * /dev/urandom. NOTE: we can't use fread with unbuffered I/O because + * fread may return EOF in unbuffered I/O mode on Android. + * + * Moreover, we read into a buffer of size BUFSIZ, so buffered I/O + * has no performance advantage. */ + fd = fileno(file); + /* 'file' was just opened, so this should not fail. */ + PORT_Assert(fd != -1); + while (limit > fileBytes && fd != -1) { + bytes = PR_MIN(sizeof buffer, limit - fileBytes); + bytes = read(fd, buffer, bytes); + if (bytes <= 0) + break; + RNG_RandomUpdate(buffer, bytes); + fileBytes += bytes; + totalFileBytes += bytes; + /* after TOTAL_FILE_LIMIT has been reached, only read in first + ** buffer of data from each subsequent file. + */ + if (totalFileBytes > TOTAL_FILE_LIMIT) + break; + } + fclose(file); + } + /* + * Pass yet another snapshot of our highest resolution clock into + * the hash function. + */ + bytes = RNG_GetNoise(buffer, sizeof(buffer)); + RNG_RandomUpdate(buffer, bytes); + return fileBytes; +} + +void +RNG_FileForRNG(const char *fileName) +{ + RNG_FileUpdate(fileName, TOTAL_FILE_LIMIT); +} + +#define _POSIX_PTHREAD_SEMANTICS +#include + +PRBool +ReadFileOK(char *dir, char *file) +{ + struct stat stat_buf; + char filename[PATH_MAX]; + int count = snprintf(filename, sizeof filename, "%s/%s", dir, file); + + if (count <= 0) { + return PR_FALSE; /* name too long, can't read it anyway */ + } + + if (stat(filename, &stat_buf) < 0) + return PR_FALSE; /* can't stat, probably can't read it then as well */ + return S_ISREG(stat_buf.st_mode) ? PR_TRUE : PR_FALSE; +} + +size_t +RNG_SystemRNG(void *dest, size_t maxLen) +{ + FILE *file; + int fd; + int bytes; + size_t fileBytes = 0; + unsigned char *buffer = dest; + + file = fopen("/dev/urandom", "r"); + if (file == NULL) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return 0; + } + /* Read from the underlying file descriptor directly to bypass stdio + * buffering and avoid reading more bytes than we need from /dev/urandom. + * NOTE: we can't use fread with unbuffered I/O because fread may return + * EOF in unbuffered I/O mode on Android. + */ + fd = fileno(file); + /* 'file' was just opened, so this should not fail. */ + PORT_Assert(fd != -1); + while (maxLen > fileBytes && fd != -1) { + bytes = maxLen - fileBytes; + bytes = read(fd, buffer, bytes); + if (bytes <= 0) + break; + fileBytes += bytes; + buffer += bytes; + } + fclose(file); + if (fileBytes != maxLen) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); /* system RNG failed */ + fileBytes = 0; + } + return fileBytes; +} diff --git a/security/nss/lib/freebl/unix_urandom.c b/security/nss/lib/freebl/unix_urandom.c new file mode 100644 index 0000000000..73006cdbb4 --- /dev/null +++ b/security/nss/lib/freebl/unix_urandom.c @@ -0,0 +1,84 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include "secerr.h" +#include "secrng.h" +#include "prprf.h" + +/* syscall getentropy() is limited to retrieving 256 bytes */ +#define GETENTROPY_MAX_BYTES 256 + +void +RNG_SystemInfoForRNG(void) +{ + PRUint8 bytes[SYSTEM_RNG_SEED_COUNT]; + size_t numBytes = RNG_SystemRNG(bytes, SYSTEM_RNG_SEED_COUNT); + if (!numBytes) { + /* error is set */ + return; + } + RNG_RandomUpdate(bytes, numBytes); + PORT_Memset(bytes, 0, sizeof bytes); +} + +size_t +RNG_SystemRNG(void *dest, size_t maxLen) +{ + int fd; + int bytes; + size_t fileBytes = 0; + unsigned char *buffer = dest; + +#if defined(__OpenBSD__) || (defined(__FreeBSD__) && __FreeBSD_version >= 1200000) || (defined(LINUX) && defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 25)))) + int result; + + while (fileBytes < maxLen) { + size_t getBytes = maxLen - fileBytes; + if (getBytes > GETENTROPY_MAX_BYTES) { + getBytes = GETENTROPY_MAX_BYTES; + } + result = getentropy(buffer, getBytes); + if (result == 0) { /* success */ + fileBytes += getBytes; + buffer += getBytes; + } else { + break; + } + } + if (fileBytes == maxLen) { /* success */ + return maxLen; + } + /* If we failed with an error other than ENOSYS, it means the destination + * buffer is not writeable. We don't need to try writing to it again. */ + if (errno != ENOSYS) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return 0; + } + /* ENOSYS means the kernel doesn't support getentropy()/getrandom(). + * Reset the number of bytes to get and fall back to /dev/urandom. */ + fileBytes = 0; +#endif + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return 0; + } + while (fileBytes < maxLen) { + bytes = read(fd, buffer, maxLen - fileBytes); + if (bytes <= 0) { + break; + } + fileBytes += bytes; + buffer += bytes; + } + (void)close(fd); + if (fileBytes != maxLen) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return 0; + } + return fileBytes; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h b/security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h new file mode 100644 index 0000000000..d53e43c21d --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h @@ -0,0 +1,678 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Bignum25519_51_H +#define __Hacl_Bignum25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +static inline void +Hacl_Impl_Curve25519_Field51_fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + f20; + out[1U] = f11 + f21; + out[2U] = f12 + f22; + out[3U] = f13 + f23; + out[4U] = f14 + f24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20; + out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21; + out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22; + out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23; + out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t tmp1 = f21 * (uint64_t)19U; + uint64_t tmp2 = f22 * (uint64_t)19U; + uint64_t tmp3 = f23 * (uint64_t)19U; + uint64_t tmp4 = f24 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4)); + FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3)); + FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4)); + FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2)); + FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3)); + FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1)); + FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2)); + FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w0 = o04; + FStar_UInt128_uint128 tmp_w1 = o14; + FStar_UInt128_uint128 tmp_w2 = o24; + FStar_UInt128_uint128 tmp_w3 = o34; + FStar_UInt128_uint128 tmp_w4 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp01 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp11 + c5; + uint64_t o2 = tmp21; + uint64_t o3 = tmp31; + uint64_t o4 = tmp41; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul2( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t f30 = f1[5U]; + uint64_t f31 = f1[6U]; + uint64_t f32 = f1[7U]; + uint64_t f33 = f1[8U]; + uint64_t f34 = f1[9U]; + uint64_t f40 = f2[5U]; + uint64_t f41 = f2[6U]; + uint64_t f42 = f2[7U]; + uint64_t f43 = f2[8U]; + uint64_t f44 = f2[9U]; + uint64_t tmp11 = f21 * (uint64_t)19U; + uint64_t tmp12 = f22 * (uint64_t)19U; + uint64_t tmp13 = f23 * (uint64_t)19U; + uint64_t tmp14 = f24 * (uint64_t)19U; + uint64_t tmp21 = f41 * (uint64_t)19U; + uint64_t tmp22 = f42 * (uint64_t)19U; + uint64_t tmp23 = f43 * (uint64_t)19U; + uint64_t tmp24 = f44 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14)); + FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13)); + FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14)); + FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12)); + FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13)); + FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14)); + FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11)); + FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12)); + FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13)); + FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14)); + FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w10 = o040; + FStar_UInt128_uint128 tmp_w11 = o140; + FStar_UInt128_uint128 tmp_w12 = o240; + FStar_UInt128_uint128 tmp_w13 = o340; + FStar_UInt128_uint128 tmp_w14 = o440; + FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40); + FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41); + FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42); + FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43); + FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24)); + FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40)); + FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23)); + FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24)); + FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22)); + FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23)); + FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21)); + FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22)); + FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40)); + FStar_UInt128_uint128 tmp_w20 = o04; + FStar_UInt128_uint128 tmp_w21 = o141; + FStar_UInt128_uint128 tmp_w22 = o241; + FStar_UInt128_uint128 tmp_w23 = o34; + FStar_UInt128_uint128 tmp_w24 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o100 = tmp0_; + uint64_t o112 = tmp10 + c50; + uint64_t o122 = tmp20; + uint64_t o132 = tmp30; + uint64_t o142 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o200 = tmp0_0; + uint64_t o212 = tmp1 + c5; + uint64_t o222 = tmp2; + uint64_t o232 = tmp3; + uint64_t o242 = tmp4; + uint64_t o10 = o100; + uint64_t o11 = o112; + uint64_t o12 = o122; + uint64_t o13 = o132; + uint64_t o14 = o142; + uint64_t o20 = o200; + uint64_t o21 = o212; + uint64_t o22 = o222; + uint64_t o23 = o232; + uint64_t o24 = o242; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul1(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10); + FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11); + FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12); + FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13); + FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14); + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsqr(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t d0 = (uint64_t)2U * f0; + uint64_t d1 = (uint64_t)2U * f1; + uint64_t d2 = (uint64_t)38U * f2; + uint64_t d3 = (uint64_t)19U * f3; + uint64_t d419 = (uint64_t)19U * f4; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0), + FStar_UInt128_mul_wide(d4, f1)), + FStar_UInt128_mul_wide(d2, f3)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1), + FStar_UInt128_mul_wide(d4, f2)), + FStar_UInt128_mul_wide(d3, f3)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2), + FStar_UInt128_mul_wide(f1, f1)), + FStar_UInt128_mul_wide(d4, f3)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3), + FStar_UInt128_mul_wide(d1, f2)), + FStar_UInt128_mul_wide(f4, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4), + FStar_UInt128_mul_wide(d1, f3)), + FStar_UInt128_mul_wide(f2, f2)); + FStar_UInt128_uint128 o00 = s0; + FStar_UInt128_uint128 o10 = s1; + FStar_UInt128_uint128 o20 = s2; + FStar_UInt128_uint128 o30 = s3; + FStar_UInt128_uint128 o40 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsqr2(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f[0U]; + uint64_t f11 = f[1U]; + uint64_t f12 = f[2U]; + uint64_t f13 = f[3U]; + uint64_t f14 = f[4U]; + uint64_t f20 = f[5U]; + uint64_t f21 = f[6U]; + uint64_t f22 = f[7U]; + uint64_t f23 = f[8U]; + uint64_t f24 = f[9U]; + uint64_t d00 = (uint64_t)2U * f10; + uint64_t d10 = (uint64_t)2U * f11; + uint64_t d20 = (uint64_t)38U * f12; + uint64_t d30 = (uint64_t)19U * f13; + uint64_t d4190 = (uint64_t)19U * f14; + uint64_t d40 = (uint64_t)2U * d4190; + FStar_UInt128_uint128 + s00 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10), + FStar_UInt128_mul_wide(d40, f11)), + FStar_UInt128_mul_wide(d20, f13)); + FStar_UInt128_uint128 + s10 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11), + FStar_UInt128_mul_wide(d40, f12)), + FStar_UInt128_mul_wide(d30, f13)); + FStar_UInt128_uint128 + s20 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12), + FStar_UInt128_mul_wide(f11, f11)), + FStar_UInt128_mul_wide(d40, f13)); + FStar_UInt128_uint128 + s30 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13), + FStar_UInt128_mul_wide(d10, f12)), + FStar_UInt128_mul_wide(f14, d4190)); + FStar_UInt128_uint128 + s40 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14), + FStar_UInt128_mul_wide(d10, f13)), + FStar_UInt128_mul_wide(f12, f12)); + FStar_UInt128_uint128 o100 = s00; + FStar_UInt128_uint128 o110 = s10; + FStar_UInt128_uint128 o120 = s20; + FStar_UInt128_uint128 o130 = s30; + FStar_UInt128_uint128 o140 = s40; + uint64_t d0 = (uint64_t)2U * f20; + uint64_t d1 = (uint64_t)2U * f21; + uint64_t d2 = (uint64_t)38U * f22; + uint64_t d3 = (uint64_t)19U * f23; + uint64_t d419 = (uint64_t)19U * f24; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20), + FStar_UInt128_mul_wide(d4, f21)), + FStar_UInt128_mul_wide(d2, f23)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21), + FStar_UInt128_mul_wide(d4, f22)), + FStar_UInt128_mul_wide(d3, f23)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22), + FStar_UInt128_mul_wide(f21, f21)), + FStar_UInt128_mul_wide(d4, f23)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23), + FStar_UInt128_mul_wide(d1, f22)), + FStar_UInt128_mul_wide(f24, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24), + FStar_UInt128_mul_wide(d1, f23)), + FStar_UInt128_mul_wide(f22, f22)); + FStar_UInt128_uint128 o200 = s0; + FStar_UInt128_uint128 o210 = s1; + FStar_UInt128_uint128 o220 = s2; + FStar_UInt128_uint128 o230 = s3; + FStar_UInt128_uint128 o240 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o101 = tmp0_; + uint64_t o111 = tmp10 + c50; + uint64_t o121 = tmp20; + uint64_t o131 = tmp30; + uint64_t o141 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o201 = tmp0_0; + uint64_t o211 = tmp1 + c5; + uint64_t o221 = tmp2; + uint64_t o231 = tmp3; + uint64_t o241 = tmp4; + uint64_t o10 = o101; + uint64_t o11 = o111; + uint64_t o12 = o121; + uint64_t o13 = o131; + uint64_t o14 = o141; + uint64_t o20 = o201; + uint64_t o21 = o211; + uint64_t o22 = o221; + uint64_t o23 = o231; + uint64_t o24 = o241; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_store_felem(uint64_t *u64s, uint64_t *f) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t l_ = f0 + (uint64_t)0U; + uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = l_ >> (uint32_t)51U; + uint64_t l_0 = f1 + c0; + uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = l_0 >> (uint32_t)51U; + uint64_t l_1 = f2 + c1; + uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = l_1 >> (uint32_t)51U; + uint64_t l_2 = f3 + c2; + uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = l_2 >> (uint32_t)51U; + uint64_t l_3 = f4 + c3; + uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = l_3 >> (uint32_t)51U; + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t f01 = tmp0_; + uint64_t f11 = tmp1 + c5; + uint64_t f21 = tmp2; + uint64_t f31 = tmp3; + uint64_t f41 = tmp4; + uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU); + uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU); + uint64_t mask = (((m0 & m1) & m2) & m3) & m4; + uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU); + uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f02 = f0_; + uint64_t f12 = f1_; + uint64_t f22 = f2_; + uint64_t f32 = f3_; + uint64_t f42 = f4_; + uint64_t o00 = f02 | f12 << (uint32_t)51U; + uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U; + uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U; + uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U; + uint64_t o0 = o00; + uint64_t o1 = o10; + uint64_t o2 = o20; + uint64_t o3 = o30; + u64s[0U] = o0; + u64s[1U] = o1; + u64s[2U] = o2; + u64s[3U] = o3; +} + +static inline void +Hacl_Impl_Curve25519_Field51_cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + uint64_t mask = (uint64_t)0U - bit; + KRML_MAYBE_FOR10(i, + (uint32_t)0U, + (uint32_t)10U, + (uint32_t)1U, + uint64_t dummy = mask & (p1[i] ^ p2[i]); + p1[i] = p1[i] ^ dummy; + p2[i] = p2[i] ^ dummy;); +} + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Bignum25519_51_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.c b/security/nss/lib/freebl/verified/Hacl_Chacha20.c new file mode 100644 index 0000000000..d8827b3bc4 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.c @@ -0,0 +1,226 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Chacha20.h" + +const uint32_t + Hacl_Impl_Chacha20_Vec_chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; + +static inline void +quarter_round(uint32_t *st, uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + uint32_t sta = st[a]; + uint32_t stb0 = st[b]; + uint32_t std0 = st[d]; + uint32_t sta10 = sta + stb0; + uint32_t std10 = std0 ^ sta10; + uint32_t std2 = std10 << (uint32_t)16U | std10 >> (uint32_t)16U; + st[a] = sta10; + st[d] = std2; + uint32_t sta0 = st[c]; + uint32_t stb1 = st[d]; + uint32_t std3 = st[b]; + uint32_t sta11 = sta0 + stb1; + uint32_t std11 = std3 ^ sta11; + uint32_t std20 = std11 << (uint32_t)12U | std11 >> (uint32_t)20U; + st[c] = sta11; + st[b] = std20; + uint32_t sta2 = st[a]; + uint32_t stb2 = st[b]; + uint32_t std4 = st[d]; + uint32_t sta12 = sta2 + stb2; + uint32_t std12 = std4 ^ sta12; + uint32_t std21 = std12 << (uint32_t)8U | std12 >> (uint32_t)24U; + st[a] = sta12; + st[d] = std21; + uint32_t sta3 = st[c]; + uint32_t stb = st[d]; + uint32_t std = st[b]; + uint32_t sta1 = sta3 + stb; + uint32_t std1 = std ^ sta1; + uint32_t std22 = std1 << (uint32_t)7U | std1 >> (uint32_t)25U; + st[c] = sta1; + st[b] = std22; +} + +static inline void +double_round(uint32_t *st) +{ + quarter_round(st, (uint32_t)0U, (uint32_t)4U, (uint32_t)8U, (uint32_t)12U); + quarter_round(st, (uint32_t)1U, (uint32_t)5U, (uint32_t)9U, (uint32_t)13U); + quarter_round(st, (uint32_t)2U, (uint32_t)6U, (uint32_t)10U, (uint32_t)14U); + quarter_round(st, (uint32_t)3U, (uint32_t)7U, (uint32_t)11U, (uint32_t)15U); + quarter_round(st, (uint32_t)0U, (uint32_t)5U, (uint32_t)10U, (uint32_t)15U); + quarter_round(st, (uint32_t)1U, (uint32_t)6U, (uint32_t)11U, (uint32_t)12U); + quarter_round(st, (uint32_t)2U, (uint32_t)7U, (uint32_t)8U, (uint32_t)13U); + quarter_round(st, (uint32_t)3U, (uint32_t)4U, (uint32_t)9U, (uint32_t)14U); +} + +static inline void +rounds(uint32_t *st) +{ + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); +} + +static inline void +chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr) +{ + memcpy(k, ctx, (uint32_t)16U * sizeof(uint32_t)); + uint32_t ctr_u32 = ctr; + k[12U] = k[12U] + ctr_u32; + rounds(k); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = k; + uint32_t x = k[i] + ctx[i]; + os[i] = x;); + k[12U] = k[12U] + ctr_u32; +} + +static const uint32_t + chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; + +void +Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +{ + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx; + uint32_t x = chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + ctx[12U] = ctr; + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); +} + +static void +chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, uint8_t *text) +{ + uint32_t k[16U] = { 0U }; + chacha20_core(k, ctx, incr); + uint32_t bl[16U] = { 0U }; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = bl; + uint8_t *bj = text + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = bl; + uint32_t x = bl[i] ^ k[i]; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + store32_le(out + i * (uint32_t)4U, bl[i]);); +} + +static inline void +chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr, uint8_t *text) +{ + uint8_t plain[64U] = { 0U }; + memcpy(plain, text, len * sizeof(uint8_t)); + chacha20_encrypt_block(ctx, plain, incr, plain); + memcpy(out, plain, len * sizeof(uint8_t)); +} + +void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text) +{ + uint32_t rem = len % (uint32_t)64U; + uint32_t nb = len / (uint32_t)64U; + uint32_t rem1 = len % (uint32_t)64U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + chacha20_encrypt_block(ctx, out + i * (uint32_t)64U, i, text + i * (uint32_t)64U); + } + if (rem1 > (uint32_t)0U) { + chacha20_encrypt_last(ctx, rem, out + nb * (uint32_t)64U, nb, text + nb * (uint32_t)64U); + } +} + +void +Hacl_Chacha20_chacha20_encrypt( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + uint32_t ctx[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, text); +} + +void +Hacl_Chacha20_chacha20_decrypt( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + uint32_t ctx[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, cipher); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/Hacl_Chacha20.h new file mode 100644 index 0000000000..56f2ae0640 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.h @@ -0,0 +1,61 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20_H +#define __Hacl_Chacha20_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void +Hacl_Chacha20_chacha20_encrypt( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +void +Hacl_Chacha20_chacha20_decrypt( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c new file mode 100644 index 0000000000..d7ee9647ac --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c @@ -0,0 +1,1177 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_128.h" + +#include "internal/Hacl_Poly1305_128.h" +#include "internal/Hacl_Krmllib.h" +#include "libintvector.h" +static inline void +poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 *pre0 = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc0 = ctx; + uint32_t sz_block = (uint32_t)32U; + uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block; + uint8_t *t00 = blocks; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)32U; + uint8_t *text0 = t00; + Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc0, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t00 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f25 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f25; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f0; + e[1U] = f1; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *rn = pre0 + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn5 = pre0 + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 f110 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 f120 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 f130 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 f140 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec128 + a11 = + Lib_IntVector_Intrinsics_vec128_add64(a1, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec128 + a21 = + Lib_IntVector_Intrinsics_vec128_add64(a2, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec128 + a31 = + Lib_IntVector_Intrinsics_vec128_add64(a3, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec128 + a41 = + Lib_IntVector_Intrinsics_vec128_add64(a4, + Lib_IntVector_Intrinsics_vec128_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec128 t01 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o00 = x02; + Lib_IntVector_Intrinsics_vec128 o10 = x12; + Lib_IntVector_Intrinsics_vec128 o20 = x21; + Lib_IntVector_Intrinsics_vec128 o30 = x32; + Lib_IntVector_Intrinsics_vec128 o40 = x42; + acc0[0U] = o00; + acc0[1U] = o10; + acc0[2U] = o20; + acc0[3U] = o30; + acc0[4U] = o40; + Lib_IntVector_Intrinsics_vec128 f100 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24); + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc0, pre0); + } + uint32_t len1 = n * (uint32_t)16U - len0; + uint8_t *t10 = blocks + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem1 = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t10 + i * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre0; + Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = t10 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre0; + Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(uint8_t)); + if (r > (uint32_t)0U) { + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_128( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_128_poly1305_init(ctx, k); + if (aadlen != (uint32_t)0U) { + poly1305_padded_128(ctx, aadlen, aad); + } + if (mlen != (uint32_t)0U) { + poly1305_padded_128(ctx, mlen, m); + } + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_128_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_128_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_128(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_128(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h new file mode 100644 index 0000000000..01e2a4f517 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h @@ -0,0 +1,67 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20Poly1305_128_H +#define __Hacl_Chacha20Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Poly1305_128.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20_Vec128.h" + +void +Hacl_Chacha20Poly1305_128_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20Poly1305_128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c new file mode 100644 index 0000000000..a4e54f1e27 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c @@ -0,0 +1,1179 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_256.h" + +#include "internal/Hacl_Poly1305_256.h" +#include "internal/Hacl_Krmllib.h" +#include "libintvector.h" +static inline void +poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc0 = ctx; + uint32_t sz_block = (uint32_t)64U; + uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block; + uint8_t *t00 = blocks; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)64U; + uint8_t *text0 = t00; + Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t00 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); + Lib_IntVector_Intrinsics_vec256 + hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); + Lib_IntVector_Intrinsics_vec256 + t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); + Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260); + Lib_IntVector_Intrinsics_vec256 + t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260); + Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260); + Lib_IntVector_Intrinsics_vec256 + t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); + Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260); + Lib_IntVector_Intrinsics_vec256 + o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 o00 = o5; + Lib_IntVector_Intrinsics_vec256 o11 = o10; + Lib_IntVector_Intrinsics_vec256 o21 = o20; + Lib_IntVector_Intrinsics_vec256 o31 = o30; + Lib_IntVector_Intrinsics_vec256 o41 = o40; + e[0U] = o00; + e[1U] = o11; + e[2U] = o21; + e[3U] = o31; + e[4U] = o41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec256 + a01 = + Lib_IntVector_Intrinsics_vec256_add64(a0, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec256 + a11 = + Lib_IntVector_Intrinsics_vec256_add64(a1, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec256 + a21 = + Lib_IntVector_Intrinsics_vec256_add64(a2, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec256 + a31 = + Lib_IntVector_Intrinsics_vec256_add64(a3, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec256 + a41 = + Lib_IntVector_Intrinsics_vec256_add64(a4, + Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec256 t01 = a04; + Lib_IntVector_Intrinsics_vec256 t1 = a14; + Lib_IntVector_Intrinsics_vec256 t2 = a24; + Lib_IntVector_Intrinsics_vec256 t3 = a34; + Lib_IntVector_Intrinsics_vec256 t4 = a44; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o01 = x02; + Lib_IntVector_Intrinsics_vec256 o12 = x12; + Lib_IntVector_Intrinsics_vec256 o22 = x21; + Lib_IntVector_Intrinsics_vec256 o32 = x32; + Lib_IntVector_Intrinsics_vec256 o42 = x42; + acc0[0U] = o01; + acc0[1U] = o12; + acc0[2U] = o22; + acc0[3U] = o32; + acc0[4U] = o42; + Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); + Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); + Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); + Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); + Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0); + } + uint32_t len1 = n * (uint32_t)16U - len0; + uint8_t *t10 = blocks + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem1 = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t10 + i * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r1 = pre0; + Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = t10 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); + Lib_IntVector_Intrinsics_vec256 *r1 = pre0; + Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(uint8_t)); + if (r > (uint32_t)0U) { + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r1 = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t0 = a06; + Lib_IntVector_Intrinsics_vec256 t1 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_256( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_256_poly1305_init(ctx, k); + if (aadlen != (uint32_t)0U) { + poly1305_padded_256(ctx, aadlen, aad); + } + if (mlen != (uint32_t)0U) { + poly1305_padded_256(ctx, mlen, m); + } + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t0 = a06; + Lib_IntVector_Intrinsics_vec256 t1 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_256_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_256_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_256(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_256_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h new file mode 100644 index 0000000000..9a81e01f57 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h @@ -0,0 +1,67 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20Poly1305_256_H +#define __Hacl_Chacha20Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Poly1305_256.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20_Vec256.h" + +void +Hacl_Chacha20Poly1305_256_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_256_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20Poly1305_256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c new file mode 100644 index 0000000000..f8efb00377 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c @@ -0,0 +1,592 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_32.h" + +#include "internal/Hacl_Krmllib.h" + +static inline void +poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + uint64_t *pre0 = ctx + (uint32_t)5U; + uint64_t *acc0 = ctx; + uint32_t nb = n * (uint32_t)16U / (uint32_t)16U; + uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = blocks + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = blocks + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(uint8_t)); + if (r > (uint32_t)0U) { + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_32( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + uint64_t ctx[25U] = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, k); + if (aadlen != (uint32_t)0U) { + poly1305_padded_32(ctx, aadlen, aad); + } + if (mlen != (uint32_t)0U) { + poly1305_padded_32(ctx, mlen, m); + } + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_32_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_32_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_chacha20_encrypt(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_32(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_chacha20_encrypt(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h new file mode 100644 index 0000000000..a3d23d6d3a --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h @@ -0,0 +1,67 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20Poly1305_32_H +#define __Hacl_Chacha20Poly1305_32_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Poly1305_32.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20.h" + +void +Hacl_Chacha20Poly1305_32_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20Poly1305_32_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c new file mode 100644 index 0000000000..697a36bb31 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c @@ -0,0 +1,819 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20_Vec128.h" + +#include "internal/Hacl_Chacha20.h" +#include "libintvector.h" +static inline void +double_round_128(Lib_IntVector_Intrinsics_vec128 *st) +{ + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std0 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std0, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std1 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std1, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std2 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std2, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std3 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std3, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std4 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std4, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std5 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std5, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std6 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std6, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std7 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std7, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std8 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std8, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std9 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std9, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std10 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std10, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std11 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std11, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std12 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std12, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std13 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std13, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std14 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std14, (uint32_t)7U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std15 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std15, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std16 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std16, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std17 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std17, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std18 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std18, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std19 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std19, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std20 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std20, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std21 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std21, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std22 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std22, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std23 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std23, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std24 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std24, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std25 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std25, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std26 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std26, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std27 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std27, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std28 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std28, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std29 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std29, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std30 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std30, (uint32_t)7U); +} + +static inline void +chacha20_core_128( + Lib_IntVector_Intrinsics_vec128 *k, + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t ctr) +{ + memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec128)); + uint32_t ctr_u32 = (uint32_t)4U * ctr; + Lib_IntVector_Intrinsics_vec128 cv = Lib_IntVector_Intrinsics_vec128_load32(ctr_u32); + k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 *os = k; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]); + os[i] = x;); + k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); +} + +static inline void +chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +{ + uint32_t ctx1[16U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx1; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + ctx1[12U] = ctr; + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x); + os[i] = x0;); + Lib_IntVector_Intrinsics_vec128 + ctr1 = + Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)0U, + (uint32_t)1U, + (uint32_t)2U, + (uint32_t)3U); + Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U]; + ctx[12U] = Lib_IntVector_Intrinsics_vec128_add32(c12, ctr1); +} + +void +Hacl_Chacha20_Vec128_chacha20_encrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_init_128(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)256U; + uint32_t nb = len / (uint32_t)256U; + uint32_t rem1 = len % (uint32_t)256U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)256U; + uint8_t *uu____1 = text + i * (uint32_t)256U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, i); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)256U; + uint8_t *uu____3 = text + nb * (uint32_t)256U; + uint8_t plain[256U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, nb); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} + +void +Hacl_Chacha20_Vec128_chacha20_decrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_init_128(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)256U; + uint32_t nb = len / (uint32_t)256U; + uint32_t rem1 = len % (uint32_t)256U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)256U; + uint8_t *uu____1 = cipher + i * (uint32_t)256U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, i); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)256U; + uint8_t *uu____3 = cipher + nb * (uint32_t)256U; + uint8_t plain[256U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, nb); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h new file mode 100644 index 0000000000..52b8d249f2 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h @@ -0,0 +1,61 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20_Vec128_H +#define __Hacl_Chacha20_Vec128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void +Hacl_Chacha20_Vec128_chacha20_encrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +void +Hacl_Chacha20_Vec128_chacha20_decrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20_Vec128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c new file mode 100644 index 0000000000..6c3e8488eb --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c @@ -0,0 +1,1207 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20_Vec256.h" + +#include "internal/Hacl_Chacha20.h" +#include "libintvector.h" +static inline void +double_round_256(Lib_IntVector_Intrinsics_vec256 *st) +{ + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std0 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std0, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std1 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std1, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std2 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std2, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std3 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std3, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std4 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std4, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std5 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std5, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std6 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std6, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std7 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std7, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std8 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std8, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std9 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std9, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std10 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std10, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std11 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std11, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std12 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std12, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std13 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std13, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std14 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std14, (uint32_t)7U); + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std15 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std15, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std16 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std16, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std17 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std17, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std18 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std18, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std19 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std19, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std20 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std20, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std21 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std21, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std22 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std22, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std23 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std23, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std24 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std24, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std25 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std25, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std26 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std26, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std27 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std27, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std28 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std28, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std29 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std29, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std30 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std30, (uint32_t)7U); +} + +static inline void +chacha20_core_256( + Lib_IntVector_Intrinsics_vec256 *k, + Lib_IntVector_Intrinsics_vec256 *ctx, + uint32_t ctr) +{ + memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec256)); + uint32_t ctr_u32 = (uint32_t)8U * ctr; + Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32); + k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 *os = k; + Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]); + os[i] = x;); + k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); +} + +static inline void +chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +{ + uint32_t ctx1[16U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx1; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + ctx1[12U] = ctr; + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x); + os[i] = x0;); + Lib_IntVector_Intrinsics_vec256 + ctr1 = + Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U, + (uint32_t)1U, + (uint32_t)2U, + (uint32_t)3U, + (uint32_t)4U, + (uint32_t)5U, + (uint32_t)6U, + (uint32_t)7U); + Lib_IntVector_Intrinsics_vec256 c12 = ctx[12U]; + ctx[12U] = Lib_IntVector_Intrinsics_vec256_add32(c12, ctr1); +} + +void +Hacl_Chacha20_Vec256_chacha20_encrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_init_256(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)512U; + uint32_t nb = len / (uint32_t)512U; + uint32_t rem1 = len % (uint32_t)512U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)512U; + uint8_t *uu____1 = text + i * (uint32_t)512U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, i); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)512U; + uint8_t *uu____3 = text + nb * (uint32_t)512U; + uint8_t plain[512U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, nb); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} + +void +Hacl_Chacha20_Vec256_chacha20_decrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_init_256(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)512U; + uint32_t nb = len / (uint32_t)512U; + uint32_t rem1 = len % (uint32_t)512U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)512U; + uint8_t *uu____1 = cipher + i * (uint32_t)512U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, i); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)512U; + uint8_t *uu____3 = cipher + nb * (uint32_t)512U; + uint8_t plain[512U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, nb); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h new file mode 100644 index 0000000000..5e9dccb9e2 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h @@ -0,0 +1,61 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20_Vec256_H +#define __Hacl_Chacha20_Vec256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void +Hacl_Chacha20_Vec256_chacha20_encrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +void +Hacl_Chacha20_Vec256_chacha20_decrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20_Vec256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c new file mode 100644 index 0000000000..09bfc4be83 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c @@ -0,0 +1,296 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Curve25519_51.h" + +#include "internal/Hacl_Krmllib.h" + +static const uint8_t g25519[32U] = { (uint8_t)9U }; + +static void +point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_uint128 *tmp2) +{ + uint64_t *nq = p01_tmp1; + uint64_t *nq_p1 = p01_tmp1 + (uint32_t)10U; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; + uint64_t *x1 = q; + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)5U; + uint64_t *z3 = nq_p1 + (uint32_t)5U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)5U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + uint64_t *x3 = nq_p1; + uint64_t *z31 = nq_p1 + (uint32_t)5U; + uint64_t *d0 = dc; + uint64_t *c0 = dc + (uint32_t)5U; + Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31); + Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31); + Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2); + Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0); + Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0); + uint64_t *a1 = tmp1; + uint64_t *b1 = tmp1 + (uint32_t)5U; + uint64_t *d = tmp1 + (uint32_t)10U; + uint64_t *c = tmp1 + (uint32_t)15U; + uint64_t *ab1 = tmp1; + uint64_t *dc1 = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + a1[4U] = c[4U]; + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b1, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2); +} + +static void +point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2) +{ + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)5U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)5U; + uint64_t *d = tmp1 + (uint32_t)10U; + uint64_t *c = tmp1 + (uint32_t)15U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + a[4U] = c[4U]; + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b, b, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2); +} + +static void +montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) +{ + FStar_UInt128_uint128 tmp2[10U]; + for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) + tmp2[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + uint64_t p01_tmp1_swap[41U] = { 0U }; + uint64_t *p0 = p01_tmp1_swap; + uint64_t *p01 = p01_tmp1_swap; + uint64_t *p03 = p01; + uint64_t *p11 = p01 + (uint32_t)10U; + memcpy(p11, init, (uint32_t)10U * sizeof(uint64_t)); + uint64_t *x0 = p03; + uint64_t *z0 = p03 + (uint32_t)5U; + x0[0U] = (uint64_t)1U; + x0[1U] = (uint64_t)0U; + x0[2U] = (uint64_t)0U; + x0[3U] = (uint64_t)0U; + x0[4U] = (uint64_t)0U; + z0[0U] = (uint64_t)0U; + z0[1U] = (uint64_t)0U; + z0[2U] = (uint64_t)0U; + z0[3U] = (uint64_t)0U; + z0[4U] = (uint64_t)0U; + uint64_t *p01_tmp1 = p01_tmp1_swap; + uint64_t *p01_tmp11 = p01_tmp1_swap; + uint64_t *nq1 = p01_tmp1_swap; + uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)10U; + uint64_t *swap = p01_tmp1_swap + (uint32_t)40U; + Hacl_Impl_Curve25519_Field51_cswap2((uint64_t)1U, nq1, nq_p11); + point_add_and_double(init, p01_tmp11, tmp2); + swap[0U] = (uint64_t)1U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { + uint64_t *p01_tmp12 = p01_tmp1_swap; + uint64_t *swap1 = p01_tmp1_swap + (uint32_t)40U; + uint64_t *nq2 = p01_tmp12; + uint64_t *nq_p12 = p01_tmp12 + (uint32_t)10U; + uint64_t + bit = + (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); + uint64_t sw = swap1[0U] ^ bit; + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12); + point_add_and_double(init, p01_tmp12, tmp2); + swap1[0U] = bit; + } + uint64_t sw = swap[0U]; + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11); + uint64_t *nq10 = p01_tmp1; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + memcpy(out, p0, (uint32_t)10U * sizeof(uint64_t)); +} + +void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n) +{ + Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp); + for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { + Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp); + } +} + +void +Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp) +{ + uint64_t t1[20U] = { 0U }; + uint64_t *a1 = t1; + uint64_t *b1 = t1 + (uint32_t)5U; + uint64_t *t010 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp10 = tmp; + Hacl_Curve25519_51_fsquare_times(a1, i, tmp10, (uint32_t)1U); + Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)2U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp); + Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp); + Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)1U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); + Hacl_Curve25519_51_fsquare_times(t010, b1, tmp10, (uint32_t)5U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); + uint64_t *b10 = t1 + (uint32_t)5U; + uint64_t *c10 = t1 + (uint32_t)10U; + uint64_t *t011 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp11 = tmp; + Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); + Hacl_Curve25519_51_fsquare_times(t011, c10, tmp11, (uint32_t)20U); + Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp); + Hacl_Curve25519_51_fsquare_times(t011, t011, tmp11, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp); + Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); + uint64_t *b11 = t1 + (uint32_t)5U; + uint64_t *c1 = t1 + (uint32_t)10U; + uint64_t *t01 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp1 = tmp; + Hacl_Curve25519_51_fsquare_times(t01, c1, tmp1, (uint32_t)100U); + Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp); + Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp); + Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)5U); + uint64_t *a = t1; + uint64_t *t0 = t1 + (uint32_t)15U; + Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp); +} + +static void +encode_point(uint8_t *o, uint64_t *i) +{ + uint64_t *x = i; + uint64_t *z = i + (uint32_t)5U; + uint64_t tmp[5U] = { 0U }; + uint64_t u64s[4U] = { 0U }; + FStar_UInt128_uint128 tmp_w[10U]; + for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) + tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + Hacl_Curve25519_51_finv(tmp, z, tmp_w); + Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w); + Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp); + KRML_MAYBE_FOR4(i0, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); +} + +void +Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint64_t init[10U] = { 0U }; + uint64_t tmp[4U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint64_t tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; + uint64_t *x = init; + uint64_t *z = init + (uint32_t)5U; + z[0U] = (uint64_t)1U; + z[1U] = (uint64_t)0U; + z[2U] = (uint64_t)0U; + z[3U] = (uint64_t)0U; + z[4U] = (uint64_t)0U; + uint64_t f0l = tmp[0U] & (uint64_t)0x7ffffffffffffU; + uint64_t f0h = tmp[0U] >> (uint32_t)51U; + uint64_t f1l = (tmp[1U] & (uint64_t)0x3fffffffffU) << (uint32_t)13U; + uint64_t f1h = tmp[1U] >> (uint32_t)38U; + uint64_t f2l = (tmp[2U] & (uint64_t)0x1ffffffU) << (uint32_t)26U; + uint64_t f2h = tmp[2U] >> (uint32_t)25U; + uint64_t f3l = (tmp[3U] & (uint64_t)0xfffU) << (uint32_t)39U; + uint64_t f3h = tmp[3U] >> (uint32_t)12U; + x[0U] = f0l; + x[1U] = f0h | f1l; + x[2U] = f1h | f2l; + x[3U] = f2h | f3l; + x[4U] = f3h; + montgomery_ladder(init, priv, init); + encode_point(out, init); +} + +void +Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv) +{ + uint8_t basepoint[32U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t *os = basepoint; + uint8_t x = g25519[i]; + os[i] = x; + } + Hacl_Curve25519_51_scalarmult(pub, priv, basepoint); +} + +bool +Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint8_t zeros[32U] = { 0U }; + Hacl_Curve25519_51_scalarmult(out, priv, pub); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]); + res = uu____0 & res; + } + uint8_t z = res; + bool r = z == (uint8_t)255U; + return !r; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h new file mode 100644 index 0000000000..2a4494a7af --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h @@ -0,0 +1,50 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Curve25519_51_H +#define __Hacl_Curve25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" +#include "Hacl_Bignum25519_51.h" + +void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); + +void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv); + +bool Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Curve25519_51_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c new file mode 100644 index 0000000000..40144d48dc --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c @@ -0,0 +1,400 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Curve25519_64.h" + +#include "internal/Vale.h" +#include "internal/Hacl_Krmllib.h" +#include "config.h" +#include "curve25519-inline.h" +static inline void +add_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + add_scalar(out, f1, f2); +#else + uint64_t uu____0 = add_scalar_e(out, f1, f2); +#endif +} + +static inline void +fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fadd(out, f1, f2); +#else + uint64_t uu____0 = fadd_e(out, f1, f2); +#endif +} + +static inline void +fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsub(out, f1, f2); +#else + uint64_t uu____0 = fsub_e(out, f1, f2); +#endif +} + +static inline void +fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul(out, f1, f2, tmp); +#else + uint64_t uu____0 = fmul_e(tmp, f1, out, f2); +#endif +} + +static inline void +fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul2(out, f1, f2, tmp); +#else + uint64_t uu____0 = fmul2_e(tmp, f1, out, f2); +#endif +} + +static inline void +fmul_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul_scalar(out, f1, f2); +#else + uint64_t uu____0 = fmul_scalar_e(out, f1, f2); +#endif +} + +static inline void +fsqr0(uint64_t *out, uint64_t *f1, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsqr(out, f1, tmp); +#else + uint64_t uu____0 = fsqr_e(tmp, f1, out); +#endif +} + +static inline void +fsqr20(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsqr2(out, f, tmp); +#else + uint64_t uu____0 = fsqr2_e(tmp, f, out); +#endif +} + +static inline void +cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + cswap2(bit, p1, p2); +#else + uint64_t uu____0 = cswap2_e(bit, p1, p2); +#endif +} + +static const uint8_t g25519[32U] = { (uint8_t)9U }; + +static void +point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2) +{ + uint64_t *nq = p01_tmp1; + uint64_t *nq_p1 = p01_tmp1 + (uint32_t)8U; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; + uint64_t *x1 = q; + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)4U; + uint64_t *z3 = nq_p1 + (uint32_t)4U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)4U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)8U; + fadd0(a, x2, z2); + fsub0(b, x2, z2); + uint64_t *x3 = nq_p1; + uint64_t *z31 = nq_p1 + (uint32_t)4U; + uint64_t *d0 = dc; + uint64_t *c0 = dc + (uint32_t)4U; + fadd0(c0, x3, z31); + fsub0(d0, x3, z31); + fmul20(dc, dc, ab, tmp2); + fadd0(x3, d0, c0); + fsub0(z31, d0, c0); + uint64_t *a1 = tmp1; + uint64_t *b1 = tmp1 + (uint32_t)4U; + uint64_t *d = tmp1 + (uint32_t)8U; + uint64_t *c = tmp1 + (uint32_t)12U; + uint64_t *ab1 = tmp1; + uint64_t *dc1 = tmp1 + (uint32_t)8U; + fsqr20(dc1, ab1, tmp2); + fsqr20(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub0(c, d, c); + fmul_scalar0(b1, c, (uint64_t)121665U); + fadd0(b1, b1, d); + fmul20(nq, dc1, ab1, tmp2); + fmul0(z3, z3, x1, tmp2); +} + +static void +point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2) +{ + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)4U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)4U; + uint64_t *d = tmp1 + (uint32_t)8U; + uint64_t *c = tmp1 + (uint32_t)12U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)8U; + fadd0(a, x2, z2); + fsub0(b, x2, z2); + fsqr20(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub0(c, d, c); + fmul_scalar0(b, c, (uint64_t)121665U); + fadd0(b, b, d); + fmul20(nq, dc, ab, tmp2); +} + +static void +montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) +{ + uint64_t tmp2[16U] = { 0U }; + uint64_t p01_tmp1_swap[33U] = { 0U }; + uint64_t *p0 = p01_tmp1_swap; + uint64_t *p01 = p01_tmp1_swap; + uint64_t *p03 = p01; + uint64_t *p11 = p01 + (uint32_t)8U; + memcpy(p11, init, (uint32_t)8U * sizeof(uint64_t)); + uint64_t *x0 = p03; + uint64_t *z0 = p03 + (uint32_t)4U; + x0[0U] = (uint64_t)1U; + x0[1U] = (uint64_t)0U; + x0[2U] = (uint64_t)0U; + x0[3U] = (uint64_t)0U; + z0[0U] = (uint64_t)0U; + z0[1U] = (uint64_t)0U; + z0[2U] = (uint64_t)0U; + z0[3U] = (uint64_t)0U; + uint64_t *p01_tmp1 = p01_tmp1_swap; + uint64_t *p01_tmp11 = p01_tmp1_swap; + uint64_t *nq1 = p01_tmp1_swap; + uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)8U; + uint64_t *swap = p01_tmp1_swap + (uint32_t)32U; + cswap20((uint64_t)1U, nq1, nq_p11); + point_add_and_double(init, p01_tmp11, tmp2); + swap[0U] = (uint64_t)1U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { + uint64_t *p01_tmp12 = p01_tmp1_swap; + uint64_t *swap1 = p01_tmp1_swap + (uint32_t)32U; + uint64_t *nq2 = p01_tmp12; + uint64_t *nq_p12 = p01_tmp12 + (uint32_t)8U; + uint64_t + bit = + (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); + uint64_t sw = swap1[0U] ^ bit; + cswap20(sw, nq2, nq_p12); + point_add_and_double(init, p01_tmp12, tmp2); + swap1[0U] = bit; + } + uint64_t sw = swap[0U]; + cswap20(sw, nq1, nq_p11); + uint64_t *nq10 = p01_tmp1; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + memcpy(out, p0, (uint32_t)8U * sizeof(uint64_t)); +} + +static void +fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n) +{ + fsqr0(o, inp, tmp); + for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { + fsqr0(o, o, tmp); + } +} + +static void +finv(uint64_t *o, uint64_t *i, uint64_t *tmp) +{ + uint64_t t1[16U] = { 0U }; + uint64_t *a1 = t1; + uint64_t *b1 = t1 + (uint32_t)4U; + uint64_t *t010 = t1 + (uint32_t)12U; + uint64_t *tmp10 = tmp; + fsquare_times(a1, i, tmp10, (uint32_t)1U); + fsquare_times(t010, a1, tmp10, (uint32_t)2U); + fmul0(b1, t010, i, tmp); + fmul0(a1, b1, a1, tmp); + fsquare_times(t010, a1, tmp10, (uint32_t)1U); + fmul0(b1, t010, b1, tmp); + fsquare_times(t010, b1, tmp10, (uint32_t)5U); + fmul0(b1, t010, b1, tmp); + uint64_t *b10 = t1 + (uint32_t)4U; + uint64_t *c10 = t1 + (uint32_t)8U; + uint64_t *t011 = t1 + (uint32_t)12U; + uint64_t *tmp11 = tmp; + fsquare_times(t011, b10, tmp11, (uint32_t)10U); + fmul0(c10, t011, b10, tmp); + fsquare_times(t011, c10, tmp11, (uint32_t)20U); + fmul0(t011, t011, c10, tmp); + fsquare_times(t011, t011, tmp11, (uint32_t)10U); + fmul0(b10, t011, b10, tmp); + fsquare_times(t011, b10, tmp11, (uint32_t)50U); + fmul0(c10, t011, b10, tmp); + uint64_t *b11 = t1 + (uint32_t)4U; + uint64_t *c1 = t1 + (uint32_t)8U; + uint64_t *t01 = t1 + (uint32_t)12U; + uint64_t *tmp1 = tmp; + fsquare_times(t01, c1, tmp1, (uint32_t)100U); + fmul0(t01, t01, c1, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)50U); + fmul0(t01, t01, b11, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)5U); + uint64_t *a = t1; + uint64_t *t0 = t1 + (uint32_t)12U; + fmul0(o, t0, a, tmp); +} + +static void +store_felem(uint64_t *b, uint64_t *f) +{ + uint64_t f30 = f[3U]; + uint64_t top_bit0 = f30 >> (uint32_t)63U; + f[3U] = f30 & (uint64_t)0x7fffffffffffffffU; + add_scalar0(f, f, (uint64_t)19U * top_bit0); + uint64_t f31 = f[3U]; + uint64_t top_bit = f31 >> (uint32_t)63U; + f[3U] = f31 & (uint64_t)0x7fffffffffffffffU; + add_scalar0(f, f, (uint64_t)19U * top_bit); + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t m0 = FStar_UInt64_gte_mask(f0, (uint64_t)0xffffffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f1, (uint64_t)0xffffffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f2, (uint64_t)0xffffffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f3, (uint64_t)0x7fffffffffffffffU); + uint64_t mask = ((m0 & m1) & m2) & m3; + uint64_t f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU); + uint64_t f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU); + uint64_t f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU); + uint64_t f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU); + uint64_t o0 = f0_; + uint64_t o1 = f1_; + uint64_t o2 = f2_; + uint64_t o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void +encode_point(uint8_t *o, uint64_t *i) +{ + uint64_t *x = i; + uint64_t *z = i + (uint32_t)4U; + uint64_t tmp[4U] = { 0U }; + uint64_t u64s[4U] = { 0U }; + uint64_t tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul0(tmp, tmp, x, tmp_w); + store_felem(u64s, tmp); + KRML_MAYBE_FOR4(i0, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); +} + +void +Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint64_t init[8U] = { 0U }; + uint64_t tmp[4U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint64_t tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; + uint64_t *x = init; + uint64_t *z = init + (uint32_t)4U; + z[0U] = (uint64_t)1U; + z[1U] = (uint64_t)0U; + z[2U] = (uint64_t)0U; + z[3U] = (uint64_t)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init, priv, init); + encode_point(out, init); +} + +void +Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv) +{ + uint8_t basepoint[32U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t *os = basepoint; + uint8_t x = g25519[i]; + os[i] = x; + } + Hacl_Curve25519_64_scalarmult(pub, priv, basepoint); +} + +bool +Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint8_t zeros[32U] = { 0U }; + Hacl_Curve25519_64_scalarmult(out, priv, pub); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]); + res = uu____0 & res; + } + uint8_t z = res; + bool r = z == (uint8_t)255U; + return !r; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h new file mode 100644 index 0000000000..e9dec2b9a0 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h @@ -0,0 +1,49 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Curve25519_64_H +#define __Hacl_Curve25519_64_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); + +void Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv); + +bool Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Curve25519_64_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Krmllib.h b/security/nss/lib/freebl/verified/Hacl_Krmllib.h new file mode 100644 index 0000000000..453492b5cc --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Krmllib.h @@ -0,0 +1,81 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Krmllib_H +#define __Hacl_Krmllib_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +static inline uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b); + +static inline uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b); + +static inline uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b); + +static inline uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b); + +static inline uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b); + +static inline uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + +static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_be(uint8_t *x0); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Krmllib_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c new file mode 100644 index 0000000000..ae8570c751 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c @@ -0,0 +1,1616 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Poly1305_128.h" + +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(b); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(b + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f10 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f02 = f00; + Lib_IntVector_Intrinsics_vec128 f12 = f10; + Lib_IntVector_Intrinsics_vec128 f22 = f20; + Lib_IntVector_Intrinsics_vec128 f32 = f30; + Lib_IntVector_Intrinsics_vec128 f42 = f40; + e[0U] = f02; + e[1U] = f12; + e[2U] = f22; + e[3U] = f32; + e[4U] = f42; + uint64_t b10 = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b10); + Lib_IntVector_Intrinsics_vec128 f43 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f43, mask); + Lib_IntVector_Intrinsics_vec128 acc0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 acc1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 acc2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 acc3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 acc4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 e0 = e[0U]; + Lib_IntVector_Intrinsics_vec128 e1 = e[1U]; + Lib_IntVector_Intrinsics_vec128 e2 = e[2U]; + Lib_IntVector_Intrinsics_vec128 e3 = e[3U]; + Lib_IntVector_Intrinsics_vec128 e4 = e[4U]; + Lib_IntVector_Intrinsics_vec128 + f0 = Lib_IntVector_Intrinsics_vec128_insert64(acc0, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f1 = Lib_IntVector_Intrinsics_vec128_insert64(acc1, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f2 = Lib_IntVector_Intrinsics_vec128_insert64(acc2, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f3 = Lib_IntVector_Intrinsics_vec128_insert64(acc3, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f4 = Lib_IntVector_Intrinsics_vec128_insert64(acc4, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 f01 = Lib_IntVector_Intrinsics_vec128_add64(f0, e0); + Lib_IntVector_Intrinsics_vec128 f11 = Lib_IntVector_Intrinsics_vec128_add64(f1, e1); + Lib_IntVector_Intrinsics_vec128 f21 = Lib_IntVector_Intrinsics_vec128_add64(f2, e2); + Lib_IntVector_Intrinsics_vec128 f31 = Lib_IntVector_Intrinsics_vec128_add64(f3, e3); + Lib_IntVector_Intrinsics_vec128 f41 = Lib_IntVector_Intrinsics_vec128_add64(f4, e4); + Lib_IntVector_Intrinsics_vec128 acc01 = f01; + Lib_IntVector_Intrinsics_vec128 acc11 = f11; + Lib_IntVector_Intrinsics_vec128 acc21 = f21; + Lib_IntVector_Intrinsics_vec128 acc31 = f31; + Lib_IntVector_Intrinsics_vec128 acc41 = f41; + acc[0U] = acc01; + acc[1U] = acc11; + acc[2U] = acc21; + acc[3U] = acc31; + acc[4U] = acc41; +} + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p) +{ + Lib_IntVector_Intrinsics_vec128 *r = p; + Lib_IntVector_Intrinsics_vec128 *r2 = p + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 a0 = out[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = out[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = out[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = out[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = out[4U]; + Lib_IntVector_Intrinsics_vec128 r10 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r12 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r13 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r14 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r20 = r2[0U]; + Lib_IntVector_Intrinsics_vec128 r21 = r2[1U]; + Lib_IntVector_Intrinsics_vec128 r22 = r2[2U]; + Lib_IntVector_Intrinsics_vec128 r23 = r2[3U]; + Lib_IntVector_Intrinsics_vec128 r24 = r2[4U]; + Lib_IntVector_Intrinsics_vec128 + r201 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r20, r10); + Lib_IntVector_Intrinsics_vec128 + r211 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r21, r11); + Lib_IntVector_Intrinsics_vec128 + r221 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r22, r12); + Lib_IntVector_Intrinsics_vec128 + r231 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r23, r13); + Lib_IntVector_Intrinsics_vec128 + r241 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r24, r14); + Lib_IntVector_Intrinsics_vec128 + r251 = Lib_IntVector_Intrinsics_vec128_smul64(r211, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r252 = Lib_IntVector_Intrinsics_vec128_smul64(r221, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r253 = Lib_IntVector_Intrinsics_vec128_smul64(r231, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r254 = Lib_IntVector_Intrinsics_vec128_smul64(r241, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_mul64(r201, a0); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_mul64(r211, a0); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_mul64(r221, a0); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_mul64(r231, a0); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_mul64(r241, a0); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a1)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a1)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a1)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r221, a1)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r231, a1)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a2)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a2)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a2)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a2)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r221, a2)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r252, a3)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a3)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a3)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a3)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a3)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r251, a4)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r252, a4)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a4)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a4)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a4)); + Lib_IntVector_Intrinsics_vec128 t0 = a05; + Lib_IntVector_Intrinsics_vec128 t1 = a15; + Lib_IntVector_Intrinsics_vec128 t2 = a25; + Lib_IntVector_Intrinsics_vec128 t3 = a35; + Lib_IntVector_Intrinsics_vec128 t4 = a45; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o10 = x12; + Lib_IntVector_Intrinsics_vec128 o20 = x21; + Lib_IntVector_Intrinsics_vec128 o30 = x32; + Lib_IntVector_Intrinsics_vec128 o40 = x42; + Lib_IntVector_Intrinsics_vec128 + o01 = + Lib_IntVector_Intrinsics_vec128_add64(o0, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o0, o0)); + Lib_IntVector_Intrinsics_vec128 + o11 = + Lib_IntVector_Intrinsics_vec128_add64(o10, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o10, o10)); + Lib_IntVector_Intrinsics_vec128 + o21 = + Lib_IntVector_Intrinsics_vec128_add64(o20, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o20, o20)); + Lib_IntVector_Intrinsics_vec128 + o31 = + Lib_IntVector_Intrinsics_vec128_add64(o30, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o30, o30)); + Lib_IntVector_Intrinsics_vec128 + o41 = + Lib_IntVector_Intrinsics_vec128_add64(o40, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o40, o40)); + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(o01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(o11, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(o21, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(o31, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(o41, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + o00 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 o1 = tmp1; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + out[0U] = o00; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +void +Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key) +{ + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[1U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[2U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[3U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[4U] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn_5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r_vec0 = Lib_IntVector_Intrinsics_vec128_load64(lo1); + Lib_IntVector_Intrinsics_vec128 r_vec1 = Lib_IntVector_Intrinsics_vec128_load64(hi1); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(r_vec0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(r_vec1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + Lib_IntVector_Intrinsics_vec128 f200 = r[0U]; + Lib_IntVector_Intrinsics_vec128 f210 = r[1U]; + Lib_IntVector_Intrinsics_vec128 f220 = r[2U]; + Lib_IntVector_Intrinsics_vec128 f230 = r[3U]; + Lib_IntVector_Intrinsics_vec128 f240 = r[4U]; + r5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f200, (uint64_t)5U); + r5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f210, (uint64_t)5U); + r5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f220, (uint64_t)5U); + r5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f230, (uint64_t)5U); + r5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f240, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = r[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = r[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = r[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = r[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = r[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f11)); + Lib_IntVector_Intrinsics_vec128 + a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, Lib_IntVector_Intrinsics_vec128_mul64(r0, f11)); + Lib_IntVector_Intrinsics_vec128 + a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, Lib_IntVector_Intrinsics_vec128_mul64(r1, f11)); + Lib_IntVector_Intrinsics_vec128 + a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, Lib_IntVector_Intrinsics_vec128_mul64(r2, f11)); + Lib_IntVector_Intrinsics_vec128 + a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, Lib_IntVector_Intrinsics_vec128_mul64(r3, f11)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f12)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f12)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f12)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f12)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f12)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f13)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f13)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f13)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f13)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f13)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f14)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f14)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f14)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f14)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f14)); + Lib_IntVector_Intrinsics_vec128 t0 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + rn[0U] = o0; + rn[1U] = o1; + rn[2U] = o2; + rn[3U] = o3; + rn[4U] = o4; + Lib_IntVector_Intrinsics_vec128 f201 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f201, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f21, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f22, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f23, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f24, (uint64_t)5U); +} + +void +Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_128_poly1305_update( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + uint32_t sz_block = (uint32_t)32U; + uint32_t len0 = len / sz_block * sz_block; + uint8_t *t0 = text; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)32U; + uint8_t *text0 = t0; + Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t0 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f25 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f25; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f0; + e[1U] = f1; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f110 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f120 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f130 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f140 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec128 + a11 = + Lib_IntVector_Intrinsics_vec128_add64(a1, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec128 + a21 = + Lib_IntVector_Intrinsics_vec128_add64(a2, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec128 + a31 = + Lib_IntVector_Intrinsics_vec128_add64(a3, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec128 + a41 = + Lib_IntVector_Intrinsics_vec128_add64(a4, + Lib_IntVector_Intrinsics_vec128_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec128 t01 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o00 = x02; + Lib_IntVector_Intrinsics_vec128 o10 = x12; + Lib_IntVector_Intrinsics_vec128 o20 = x21; + Lib_IntVector_Intrinsics_vec128 o30 = x32; + Lib_IntVector_Intrinsics_vec128 o40 = x42; + acc[0U] = o00; + acc[1U] = o10; + acc[2U] = o20; + acc[3U] = o30; + acc[4U] = o40; + Lib_IntVector_Intrinsics_vec128 f100 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24); + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc, pre); + } + uint32_t len1 = len - len0; + uint8_t *t1 = text + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t1 + i * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem > (uint32_t)0U) { + uint8_t *last = t1 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 fi = e[rem * (uint32_t)8U / (uint32_t)26U]; + e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_128_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec128 *ctx) +{ + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 f0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f23 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f33 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f40 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 + l0 = Lib_IntVector_Intrinsics_vec128_add64(f0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp00 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c00 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(f13, c00); + Lib_IntVector_Intrinsics_vec128 + tmp10 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c10 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(f23, c10); + Lib_IntVector_Intrinsics_vec128 + tmp20 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c20 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(f33, c20); + Lib_IntVector_Intrinsics_vec128 + tmp30 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c30 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l4 = Lib_IntVector_Intrinsics_vec128_add64(f40, c30); + Lib_IntVector_Intrinsics_vec128 + tmp40 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c40 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_add64(tmp00, + Lib_IntVector_Intrinsics_vec128_smul64(c40, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 f110 = tmp10; + Lib_IntVector_Intrinsics_vec128 f210 = tmp20; + Lib_IntVector_Intrinsics_vec128 f310 = tmp30; + Lib_IntVector_Intrinsics_vec128 f410 = tmp40; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(f010, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l5 = Lib_IntVector_Intrinsics_vec128_add64(f110, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l5, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l5, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l6 = Lib_IntVector_Intrinsics_vec128_add64(f210, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l6, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l6, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l7 = Lib_IntVector_Intrinsics_vec128_add64(f310, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l7, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l7, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l8 = Lib_IntVector_Intrinsics_vec128_add64(f410, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l8, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l8, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + f02 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 f12 = tmp1; + Lib_IntVector_Intrinsics_vec128 f22 = tmp2; + Lib_IntVector_Intrinsics_vec128 f32 = tmp3; + Lib_IntVector_Intrinsics_vec128 f42 = tmp4; + Lib_IntVector_Intrinsics_vec128 + mh = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + ml = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffffbU); + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_eq64(f42, mh); + Lib_IntVector_Intrinsics_vec128 + mask1 = + Lib_IntVector_Intrinsics_vec128_and(mask, + Lib_IntVector_Intrinsics_vec128_eq64(f32, mh)); + Lib_IntVector_Intrinsics_vec128 + mask2 = + Lib_IntVector_Intrinsics_vec128_and(mask1, + Lib_IntVector_Intrinsics_vec128_eq64(f22, mh)); + Lib_IntVector_Intrinsics_vec128 + mask3 = + Lib_IntVector_Intrinsics_vec128_and(mask2, + Lib_IntVector_Intrinsics_vec128_eq64(f12, mh)); + Lib_IntVector_Intrinsics_vec128 + mask4 = + Lib_IntVector_Intrinsics_vec128_and(mask3, + Lib_IntVector_Intrinsics_vec128_lognot(Lib_IntVector_Intrinsics_vec128_gt64(ml, f02))); + Lib_IntVector_Intrinsics_vec128 ph = Lib_IntVector_Intrinsics_vec128_and(mask4, mh); + Lib_IntVector_Intrinsics_vec128 pl = Lib_IntVector_Intrinsics_vec128_and(mask4, ml); + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_sub64(f02, pl); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_sub64(f12, ph); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_sub64(f22, ph); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_sub64(f32, ph); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_sub64(f42, ph); + Lib_IntVector_Intrinsics_vec128 f011 = o0; + Lib_IntVector_Intrinsics_vec128 f111 = o1; + Lib_IntVector_Intrinsics_vec128 f211 = o2; + Lib_IntVector_Intrinsics_vec128 f311 = o3; + Lib_IntVector_Intrinsics_vec128 f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + Lib_IntVector_Intrinsics_vec128 f00 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f4 = acc[4U]; + uint64_t f01 = Lib_IntVector_Intrinsics_vec128_extract64(f00, (uint32_t)0U); + uint64_t f112 = Lib_IntVector_Intrinsics_vec128_extract64(f1, (uint32_t)0U); + uint64_t f212 = Lib_IntVector_Intrinsics_vec128_extract64(f2, (uint32_t)0U); + uint64_t f312 = Lib_IntVector_Intrinsics_vec128_extract64(f3, (uint32_t)0U); + uint64_t f41 = Lib_IntVector_Intrinsics_vec128_extract64(f4, (uint32_t)0U); + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U }; + Hacl_Poly1305_128_poly1305_init(ctx, key); + Hacl_Poly1305_128_poly1305_update(ctx, len, text); + Hacl_Poly1305_128_poly1305_finish(tag, key, ctx); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h new file mode 100644 index 0000000000..03069fdb4e --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h @@ -0,0 +1,63 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Poly1305_128_H +#define __Hacl_Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" +#include "libintvector.h" +typedef Lib_IntVector_Intrinsics_vec128 *Hacl_Poly1305_128_poly1305_ctx; + +void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key); + +void Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text); + +void +Hacl_Poly1305_128_poly1305_update( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text); + +void +Hacl_Poly1305_128_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec128 *ctx); + +void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Poly1305_128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c new file mode 100644 index 0000000000..c5a2a5908e --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c @@ -0,0 +1,2087 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Poly1305_256.h" + +void +Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(b); + Lib_IntVector_Intrinsics_vec256 + hi = Lib_IntVector_Intrinsics_vec256_load64_le(b + (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); + Lib_IntVector_Intrinsics_vec256 t0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); + Lib_IntVector_Intrinsics_vec256 t3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); + Lib_IntVector_Intrinsics_vec256 + t2 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)4U); + Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t2, mask26); + Lib_IntVector_Intrinsics_vec256 + t1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t1, mask26); + Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 + t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)30U); + Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask26); + Lib_IntVector_Intrinsics_vec256 + o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 o0 = o5; + Lib_IntVector_Intrinsics_vec256 o1 = o10; + Lib_IntVector_Intrinsics_vec256 o2 = o20; + Lib_IntVector_Intrinsics_vec256 o3 = o30; + Lib_IntVector_Intrinsics_vec256 o4 = o40; + e[0U] = o0; + e[1U] = o1; + e[2U] = o2; + e[3U] = o3; + e[4U] = o4; + uint64_t b1 = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b1); + Lib_IntVector_Intrinsics_vec256 f40 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f40, mask); + Lib_IntVector_Intrinsics_vec256 acc0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 acc1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 acc2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 acc3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 acc4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 e0 = e[0U]; + Lib_IntVector_Intrinsics_vec256 e1 = e[1U]; + Lib_IntVector_Intrinsics_vec256 e2 = e[2U]; + Lib_IntVector_Intrinsics_vec256 e3 = e[3U]; + Lib_IntVector_Intrinsics_vec256 e4 = e[4U]; + Lib_IntVector_Intrinsics_vec256 r0 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r1 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r2 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r3 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r4 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 + r01 = + Lib_IntVector_Intrinsics_vec256_insert64(r0, + Lib_IntVector_Intrinsics_vec256_extract64(acc0, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r11 = + Lib_IntVector_Intrinsics_vec256_insert64(r1, + Lib_IntVector_Intrinsics_vec256_extract64(acc1, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r21 = + Lib_IntVector_Intrinsics_vec256_insert64(r2, + Lib_IntVector_Intrinsics_vec256_extract64(acc2, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r31 = + Lib_IntVector_Intrinsics_vec256_insert64(r3, + Lib_IntVector_Intrinsics_vec256_extract64(acc3, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r41 = + Lib_IntVector_Intrinsics_vec256_insert64(r4, + Lib_IntVector_Intrinsics_vec256_extract64(acc4, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_add64(r01, e0); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_add64(r11, e1); + Lib_IntVector_Intrinsics_vec256 f2 = Lib_IntVector_Intrinsics_vec256_add64(r21, e2); + Lib_IntVector_Intrinsics_vec256 f3 = Lib_IntVector_Intrinsics_vec256_add64(r31, e3); + Lib_IntVector_Intrinsics_vec256 f4 = Lib_IntVector_Intrinsics_vec256_add64(r41, e4); + Lib_IntVector_Intrinsics_vec256 acc01 = f0; + Lib_IntVector_Intrinsics_vec256 acc11 = f1; + Lib_IntVector_Intrinsics_vec256 acc21 = f2; + Lib_IntVector_Intrinsics_vec256 acc31 = f3; + Lib_IntVector_Intrinsics_vec256 acc41 = f4; + acc[0U] = acc01; + acc[1U] = acc11; + acc[2U] = acc21; + acc[3U] = acc31; + acc[4U] = acc41; +} + +void +Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( + Lib_IntVector_Intrinsics_vec256 *out, + Lib_IntVector_Intrinsics_vec256 *p) +{ + Lib_IntVector_Intrinsics_vec256 *r = p; + Lib_IntVector_Intrinsics_vec256 *r_5 = p + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *r4 = p + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 a0 = out[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = out[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = out[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = out[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = out[4U]; + Lib_IntVector_Intrinsics_vec256 r10 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r12 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r13 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r14 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r151 = r_5[1U]; + Lib_IntVector_Intrinsics_vec256 r152 = r_5[2U]; + Lib_IntVector_Intrinsics_vec256 r153 = r_5[3U]; + Lib_IntVector_Intrinsics_vec256 r154 = r_5[4U]; + Lib_IntVector_Intrinsics_vec256 r40 = r4[0U]; + Lib_IntVector_Intrinsics_vec256 r41 = r4[1U]; + Lib_IntVector_Intrinsics_vec256 r42 = r4[2U]; + Lib_IntVector_Intrinsics_vec256 r43 = r4[3U]; + Lib_IntVector_Intrinsics_vec256 r44 = r4[4U]; + Lib_IntVector_Intrinsics_vec256 a010 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r10); + Lib_IntVector_Intrinsics_vec256 a110 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r10); + Lib_IntVector_Intrinsics_vec256 a210 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r10); + Lib_IntVector_Intrinsics_vec256 a310 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r10); + Lib_IntVector_Intrinsics_vec256 a410 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r10); + Lib_IntVector_Intrinsics_vec256 + a020 = + Lib_IntVector_Intrinsics_vec256_add64(a010, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r11)); + Lib_IntVector_Intrinsics_vec256 + a120 = + Lib_IntVector_Intrinsics_vec256_add64(a110, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r11)); + Lib_IntVector_Intrinsics_vec256 + a220 = + Lib_IntVector_Intrinsics_vec256_add64(a210, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r11)); + Lib_IntVector_Intrinsics_vec256 + a320 = + Lib_IntVector_Intrinsics_vec256_add64(a310, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r11)); + Lib_IntVector_Intrinsics_vec256 + a420 = + Lib_IntVector_Intrinsics_vec256_add64(a410, + Lib_IntVector_Intrinsics_vec256_mul64(r13, r11)); + Lib_IntVector_Intrinsics_vec256 + a030 = + Lib_IntVector_Intrinsics_vec256_add64(a020, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r12)); + Lib_IntVector_Intrinsics_vec256 + a130 = + Lib_IntVector_Intrinsics_vec256_add64(a120, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r12)); + Lib_IntVector_Intrinsics_vec256 + a230 = + Lib_IntVector_Intrinsics_vec256_add64(a220, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r12)); + Lib_IntVector_Intrinsics_vec256 + a330 = + Lib_IntVector_Intrinsics_vec256_add64(a320, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r12)); + Lib_IntVector_Intrinsics_vec256 + a430 = + Lib_IntVector_Intrinsics_vec256_add64(a420, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r12)); + Lib_IntVector_Intrinsics_vec256 + a040 = + Lib_IntVector_Intrinsics_vec256_add64(a030, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r13)); + Lib_IntVector_Intrinsics_vec256 + a140 = + Lib_IntVector_Intrinsics_vec256_add64(a130, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r13)); + Lib_IntVector_Intrinsics_vec256 + a240 = + Lib_IntVector_Intrinsics_vec256_add64(a230, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r13)); + Lib_IntVector_Intrinsics_vec256 + a340 = + Lib_IntVector_Intrinsics_vec256_add64(a330, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r13)); + Lib_IntVector_Intrinsics_vec256 + a440 = + Lib_IntVector_Intrinsics_vec256_add64(a430, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r13)); + Lib_IntVector_Intrinsics_vec256 + a050 = + Lib_IntVector_Intrinsics_vec256_add64(a040, + Lib_IntVector_Intrinsics_vec256_mul64(r151, r14)); + Lib_IntVector_Intrinsics_vec256 + a150 = + Lib_IntVector_Intrinsics_vec256_add64(a140, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r14)); + Lib_IntVector_Intrinsics_vec256 + a250 = + Lib_IntVector_Intrinsics_vec256_add64(a240, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r14)); + Lib_IntVector_Intrinsics_vec256 + a350 = + Lib_IntVector_Intrinsics_vec256_add64(a340, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r14)); + Lib_IntVector_Intrinsics_vec256 + a450 = + Lib_IntVector_Intrinsics_vec256_add64(a440, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r14)); + Lib_IntVector_Intrinsics_vec256 t00 = a050; + Lib_IntVector_Intrinsics_vec256 t10 = a150; + Lib_IntVector_Intrinsics_vec256 t20 = a250; + Lib_IntVector_Intrinsics_vec256 t30 = a350; + Lib_IntVector_Intrinsics_vec256 t40 = a450; + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260); + Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260); + Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00); + Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10); + Lib_IntVector_Intrinsics_vec256 + z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5); + Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260); + Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260); + Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010); + Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12); + Lib_IntVector_Intrinsics_vec256 + z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260); + Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260); + Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020); + Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130); + Lib_IntVector_Intrinsics_vec256 + z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260); + Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030); + Lib_IntVector_Intrinsics_vec256 r20 = x020; + Lib_IntVector_Intrinsics_vec256 r21 = x120; + Lib_IntVector_Intrinsics_vec256 r22 = x210; + Lib_IntVector_Intrinsics_vec256 r23 = x320; + Lib_IntVector_Intrinsics_vec256 r24 = x420; + Lib_IntVector_Intrinsics_vec256 a011 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r20); + Lib_IntVector_Intrinsics_vec256 a111 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r20); + Lib_IntVector_Intrinsics_vec256 a211 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r20); + Lib_IntVector_Intrinsics_vec256 a311 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r20); + Lib_IntVector_Intrinsics_vec256 a411 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r20); + Lib_IntVector_Intrinsics_vec256 + a021 = + Lib_IntVector_Intrinsics_vec256_add64(a011, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r21)); + Lib_IntVector_Intrinsics_vec256 + a121 = + Lib_IntVector_Intrinsics_vec256_add64(a111, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r21)); + Lib_IntVector_Intrinsics_vec256 + a221 = + Lib_IntVector_Intrinsics_vec256_add64(a211, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r21)); + Lib_IntVector_Intrinsics_vec256 + a321 = + Lib_IntVector_Intrinsics_vec256_add64(a311, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r21)); + Lib_IntVector_Intrinsics_vec256 + a421 = + Lib_IntVector_Intrinsics_vec256_add64(a411, + Lib_IntVector_Intrinsics_vec256_mul64(r13, r21)); + Lib_IntVector_Intrinsics_vec256 + a031 = + Lib_IntVector_Intrinsics_vec256_add64(a021, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r22)); + Lib_IntVector_Intrinsics_vec256 + a131 = + Lib_IntVector_Intrinsics_vec256_add64(a121, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r22)); + Lib_IntVector_Intrinsics_vec256 + a231 = + Lib_IntVector_Intrinsics_vec256_add64(a221, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r22)); + Lib_IntVector_Intrinsics_vec256 + a331 = + Lib_IntVector_Intrinsics_vec256_add64(a321, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r22)); + Lib_IntVector_Intrinsics_vec256 + a431 = + Lib_IntVector_Intrinsics_vec256_add64(a421, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r22)); + Lib_IntVector_Intrinsics_vec256 + a041 = + Lib_IntVector_Intrinsics_vec256_add64(a031, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r23)); + Lib_IntVector_Intrinsics_vec256 + a141 = + Lib_IntVector_Intrinsics_vec256_add64(a131, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r23)); + Lib_IntVector_Intrinsics_vec256 + a241 = + Lib_IntVector_Intrinsics_vec256_add64(a231, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r23)); + Lib_IntVector_Intrinsics_vec256 + a341 = + Lib_IntVector_Intrinsics_vec256_add64(a331, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r23)); + Lib_IntVector_Intrinsics_vec256 + a441 = + Lib_IntVector_Intrinsics_vec256_add64(a431, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r23)); + Lib_IntVector_Intrinsics_vec256 + a051 = + Lib_IntVector_Intrinsics_vec256_add64(a041, + Lib_IntVector_Intrinsics_vec256_mul64(r151, r24)); + Lib_IntVector_Intrinsics_vec256 + a151 = + Lib_IntVector_Intrinsics_vec256_add64(a141, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r24)); + Lib_IntVector_Intrinsics_vec256 + a251 = + Lib_IntVector_Intrinsics_vec256_add64(a241, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r24)); + Lib_IntVector_Intrinsics_vec256 + a351 = + Lib_IntVector_Intrinsics_vec256_add64(a341, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r24)); + Lib_IntVector_Intrinsics_vec256 + a451 = + Lib_IntVector_Intrinsics_vec256_add64(a441, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r24)); + Lib_IntVector_Intrinsics_vec256 t01 = a051; + Lib_IntVector_Intrinsics_vec256 t11 = a151; + Lib_IntVector_Intrinsics_vec256 t21 = a251; + Lib_IntVector_Intrinsics_vec256 t31 = a351; + Lib_IntVector_Intrinsics_vec256 t41 = a451; + Lib_IntVector_Intrinsics_vec256 + mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z04 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z14 = Lib_IntVector_Intrinsics_vec256_shift_right64(t31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x03 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); + Lib_IntVector_Intrinsics_vec256 x33 = Lib_IntVector_Intrinsics_vec256_and(t31, mask261); + Lib_IntVector_Intrinsics_vec256 x13 = Lib_IntVector_Intrinsics_vec256_add64(t11, z04); + Lib_IntVector_Intrinsics_vec256 x43 = Lib_IntVector_Intrinsics_vec256_add64(t41, z14); + Lib_IntVector_Intrinsics_vec256 + z011 = Lib_IntVector_Intrinsics_vec256_shift_right64(x13, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z111 = Lib_IntVector_Intrinsics_vec256_shift_right64(x43, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t6 = Lib_IntVector_Intrinsics_vec256_shift_left64(z111, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z111, t6); + Lib_IntVector_Intrinsics_vec256 x111 = Lib_IntVector_Intrinsics_vec256_and(x13, mask261); + Lib_IntVector_Intrinsics_vec256 x411 = Lib_IntVector_Intrinsics_vec256_and(x43, mask261); + Lib_IntVector_Intrinsics_vec256 x22 = Lib_IntVector_Intrinsics_vec256_add64(t21, z011); + Lib_IntVector_Intrinsics_vec256 x011 = Lib_IntVector_Intrinsics_vec256_add64(x03, z120); + Lib_IntVector_Intrinsics_vec256 + z021 = Lib_IntVector_Intrinsics_vec256_shift_right64(x22, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z131 = Lib_IntVector_Intrinsics_vec256_shift_right64(x011, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x211 = Lib_IntVector_Intrinsics_vec256_and(x22, mask261); + Lib_IntVector_Intrinsics_vec256 x021 = Lib_IntVector_Intrinsics_vec256_and(x011, mask261); + Lib_IntVector_Intrinsics_vec256 x311 = Lib_IntVector_Intrinsics_vec256_add64(x33, z021); + Lib_IntVector_Intrinsics_vec256 x121 = Lib_IntVector_Intrinsics_vec256_add64(x111, z131); + Lib_IntVector_Intrinsics_vec256 + z031 = Lib_IntVector_Intrinsics_vec256_shift_right64(x311, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x321 = Lib_IntVector_Intrinsics_vec256_and(x311, mask261); + Lib_IntVector_Intrinsics_vec256 x421 = Lib_IntVector_Intrinsics_vec256_add64(x411, z031); + Lib_IntVector_Intrinsics_vec256 r30 = x021; + Lib_IntVector_Intrinsics_vec256 r31 = x121; + Lib_IntVector_Intrinsics_vec256 r32 = x211; + Lib_IntVector_Intrinsics_vec256 r33 = x321; + Lib_IntVector_Intrinsics_vec256 r34 = x421; + Lib_IntVector_Intrinsics_vec256 + v12120 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r20, r10); + Lib_IntVector_Intrinsics_vec256 + v34340 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r40, r30); + Lib_IntVector_Intrinsics_vec256 + r12340 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34340, v12120); + Lib_IntVector_Intrinsics_vec256 + v12121 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r21, r11); + Lib_IntVector_Intrinsics_vec256 + v34341 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r41, r31); + Lib_IntVector_Intrinsics_vec256 + r12341 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34341, v12121); + Lib_IntVector_Intrinsics_vec256 + v12122 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r22, r12); + Lib_IntVector_Intrinsics_vec256 + v34342 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r42, r32); + Lib_IntVector_Intrinsics_vec256 + r12342 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34342, v12122); + Lib_IntVector_Intrinsics_vec256 + v12123 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r23, r13); + Lib_IntVector_Intrinsics_vec256 + v34343 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r43, r33); + Lib_IntVector_Intrinsics_vec256 + r12343 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34343, v12123); + Lib_IntVector_Intrinsics_vec256 + v12124 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r24, r14); + Lib_IntVector_Intrinsics_vec256 + v34344 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r44, r34); + Lib_IntVector_Intrinsics_vec256 + r12344 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34344, v12124); + Lib_IntVector_Intrinsics_vec256 + r123451 = Lib_IntVector_Intrinsics_vec256_smul64(r12341, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 + r123452 = Lib_IntVector_Intrinsics_vec256_smul64(r12342, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 + r123453 = Lib_IntVector_Intrinsics_vec256_smul64(r12343, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 + r123454 = Lib_IntVector_Intrinsics_vec256_smul64(r12344, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_mul64(r12340, a0); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_mul64(r12341, a0); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_mul64(r12342, a0); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_mul64(r12343, a0); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_mul64(r12344, a0); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a1)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a1)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r12341, a1)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r12342, a1)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r12343, a1)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r123453, a2)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a2)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a2)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r12341, a2)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r12342, a2)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r123452, a3)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r123453, a3)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a3)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a3)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r12341, a3)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r123451, a4)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r123452, a4)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r123453, a4)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a4)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a4)); + Lib_IntVector_Intrinsics_vec256 t0 = a05; + Lib_IntVector_Intrinsics_vec256 t1 = a15; + Lib_IntVector_Intrinsics_vec256 t2 = a25; + Lib_IntVector_Intrinsics_vec256 t3 = a35; + Lib_IntVector_Intrinsics_vec256 t4 = a45; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z121 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z121); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o10 = x12; + Lib_IntVector_Intrinsics_vec256 o20 = x21; + Lib_IntVector_Intrinsics_vec256 o30 = x32; + Lib_IntVector_Intrinsics_vec256 o40 = x42; + Lib_IntVector_Intrinsics_vec256 + v00 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o0, o0); + Lib_IntVector_Intrinsics_vec256 v10 = Lib_IntVector_Intrinsics_vec256_add64(o0, v00); + Lib_IntVector_Intrinsics_vec256 + v10h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v10, v10); + Lib_IntVector_Intrinsics_vec256 v20 = Lib_IntVector_Intrinsics_vec256_add64(v10, v10h); + Lib_IntVector_Intrinsics_vec256 + v01 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o10, o10); + Lib_IntVector_Intrinsics_vec256 v11 = Lib_IntVector_Intrinsics_vec256_add64(o10, v01); + Lib_IntVector_Intrinsics_vec256 + v11h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v11, v11); + Lib_IntVector_Intrinsics_vec256 v21 = Lib_IntVector_Intrinsics_vec256_add64(v11, v11h); + Lib_IntVector_Intrinsics_vec256 + v02 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o20, o20); + Lib_IntVector_Intrinsics_vec256 v12 = Lib_IntVector_Intrinsics_vec256_add64(o20, v02); + Lib_IntVector_Intrinsics_vec256 + v12h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v12, v12); + Lib_IntVector_Intrinsics_vec256 v22 = Lib_IntVector_Intrinsics_vec256_add64(v12, v12h); + Lib_IntVector_Intrinsics_vec256 + v03 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o30, o30); + Lib_IntVector_Intrinsics_vec256 v13 = Lib_IntVector_Intrinsics_vec256_add64(o30, v03); + Lib_IntVector_Intrinsics_vec256 + v13h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v13, v13); + Lib_IntVector_Intrinsics_vec256 v23 = Lib_IntVector_Intrinsics_vec256_add64(v13, v13h); + Lib_IntVector_Intrinsics_vec256 + v04 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o40, o40); + Lib_IntVector_Intrinsics_vec256 v14 = Lib_IntVector_Intrinsics_vec256_add64(o40, v04); + Lib_IntVector_Intrinsics_vec256 + v14h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v14, v14); + Lib_IntVector_Intrinsics_vec256 v24 = Lib_IntVector_Intrinsics_vec256_add64(v14, v14h); + Lib_IntVector_Intrinsics_vec256 + l = Lib_IntVector_Intrinsics_vec256_add64(v20, Lib_IntVector_Intrinsics_vec256_zero); + Lib_IntVector_Intrinsics_vec256 + tmp0 = + Lib_IntVector_Intrinsics_vec256_and(l, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l0 = Lib_IntVector_Intrinsics_vec256_add64(v21, c0); + Lib_IntVector_Intrinsics_vec256 + tmp1 = + Lib_IntVector_Intrinsics_vec256_and(l0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(v22, c1); + Lib_IntVector_Intrinsics_vec256 + tmp2 = + Lib_IntVector_Intrinsics_vec256_and(l1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(v23, c2); + Lib_IntVector_Intrinsics_vec256 + tmp3 = + Lib_IntVector_Intrinsics_vec256_and(l2, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(v24, c3); + Lib_IntVector_Intrinsics_vec256 + tmp4 = + Lib_IntVector_Intrinsics_vec256_and(l3, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + o00 = + Lib_IntVector_Intrinsics_vec256_add64(tmp0, + Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec256 o1 = tmp1; + Lib_IntVector_Intrinsics_vec256 o2 = tmp2; + Lib_IntVector_Intrinsics_vec256 o3 = tmp3; + Lib_IntVector_Intrinsics_vec256 o4 = tmp4; + out[0U] = o00; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +void +Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key) +{ + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[1U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[2U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[3U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[4U] = Lib_IntVector_Intrinsics_vec256_zero; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 *rn_5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec256 r_vec0 = Lib_IntVector_Intrinsics_vec256_load64(lo1); + Lib_IntVector_Intrinsics_vec256 r_vec1 = Lib_IntVector_Intrinsics_vec256_load64(hi1); + Lib_IntVector_Intrinsics_vec256 + f00 = + Lib_IntVector_Intrinsics_vec256_and(r_vec0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f15 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(r_vec1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f0 = f00; + Lib_IntVector_Intrinsics_vec256 f1 = f15; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + Lib_IntVector_Intrinsics_vec256 f200 = r[0U]; + Lib_IntVector_Intrinsics_vec256 f210 = r[1U]; + Lib_IntVector_Intrinsics_vec256 f220 = r[2U]; + Lib_IntVector_Intrinsics_vec256 f230 = r[3U]; + Lib_IntVector_Intrinsics_vec256 f240 = r[4U]; + r5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f200, (uint64_t)5U); + r5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f210, (uint64_t)5U); + r5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f220, (uint64_t)5U); + r5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f230, (uint64_t)5U); + r5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f240, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r10 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r20 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r30 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r40 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r510 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r520 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r530 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r540 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f100 = r[0U]; + Lib_IntVector_Intrinsics_vec256 f110 = r[1U]; + Lib_IntVector_Intrinsics_vec256 f120 = r[2U]; + Lib_IntVector_Intrinsics_vec256 f130 = r[3U]; + Lib_IntVector_Intrinsics_vec256 f140 = r[4U]; + Lib_IntVector_Intrinsics_vec256 a00 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f100); + Lib_IntVector_Intrinsics_vec256 a10 = Lib_IntVector_Intrinsics_vec256_mul64(r10, f100); + Lib_IntVector_Intrinsics_vec256 a20 = Lib_IntVector_Intrinsics_vec256_mul64(r20, f100); + Lib_IntVector_Intrinsics_vec256 a30 = Lib_IntVector_Intrinsics_vec256_mul64(r30, f100); + Lib_IntVector_Intrinsics_vec256 a40 = Lib_IntVector_Intrinsics_vec256_mul64(r40, f100); + Lib_IntVector_Intrinsics_vec256 + a010 = + Lib_IntVector_Intrinsics_vec256_add64(a00, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f110)); + Lib_IntVector_Intrinsics_vec256 + a110 = + Lib_IntVector_Intrinsics_vec256_add64(a10, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec256 + a210 = + Lib_IntVector_Intrinsics_vec256_add64(a20, + Lib_IntVector_Intrinsics_vec256_mul64(r10, f110)); + Lib_IntVector_Intrinsics_vec256 + a310 = + Lib_IntVector_Intrinsics_vec256_add64(a30, + Lib_IntVector_Intrinsics_vec256_mul64(r20, f110)); + Lib_IntVector_Intrinsics_vec256 + a410 = + Lib_IntVector_Intrinsics_vec256_add64(a40, + Lib_IntVector_Intrinsics_vec256_mul64(r30, f110)); + Lib_IntVector_Intrinsics_vec256 + a020 = + Lib_IntVector_Intrinsics_vec256_add64(a010, + Lib_IntVector_Intrinsics_vec256_mul64(r530, f120)); + Lib_IntVector_Intrinsics_vec256 + a120 = + Lib_IntVector_Intrinsics_vec256_add64(a110, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f120)); + Lib_IntVector_Intrinsics_vec256 + a220 = + Lib_IntVector_Intrinsics_vec256_add64(a210, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec256 + a320 = + Lib_IntVector_Intrinsics_vec256_add64(a310, + Lib_IntVector_Intrinsics_vec256_mul64(r10, f120)); + Lib_IntVector_Intrinsics_vec256 + a420 = + Lib_IntVector_Intrinsics_vec256_add64(a410, + Lib_IntVector_Intrinsics_vec256_mul64(r20, f120)); + Lib_IntVector_Intrinsics_vec256 + a030 = + Lib_IntVector_Intrinsics_vec256_add64(a020, + Lib_IntVector_Intrinsics_vec256_mul64(r520, f130)); + Lib_IntVector_Intrinsics_vec256 + a130 = + Lib_IntVector_Intrinsics_vec256_add64(a120, + Lib_IntVector_Intrinsics_vec256_mul64(r530, f130)); + Lib_IntVector_Intrinsics_vec256 + a230 = + Lib_IntVector_Intrinsics_vec256_add64(a220, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f130)); + Lib_IntVector_Intrinsics_vec256 + a330 = + Lib_IntVector_Intrinsics_vec256_add64(a320, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec256 + a430 = + Lib_IntVector_Intrinsics_vec256_add64(a420, + Lib_IntVector_Intrinsics_vec256_mul64(r10, f130)); + Lib_IntVector_Intrinsics_vec256 + a040 = + Lib_IntVector_Intrinsics_vec256_add64(a030, + Lib_IntVector_Intrinsics_vec256_mul64(r510, f140)); + Lib_IntVector_Intrinsics_vec256 + a140 = + Lib_IntVector_Intrinsics_vec256_add64(a130, + Lib_IntVector_Intrinsics_vec256_mul64(r520, f140)); + Lib_IntVector_Intrinsics_vec256 + a240 = + Lib_IntVector_Intrinsics_vec256_add64(a230, + Lib_IntVector_Intrinsics_vec256_mul64(r530, f140)); + Lib_IntVector_Intrinsics_vec256 + a340 = + Lib_IntVector_Intrinsics_vec256_add64(a330, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f140)); + Lib_IntVector_Intrinsics_vec256 + a440 = + Lib_IntVector_Intrinsics_vec256_add64(a430, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec256 t00 = a040; + Lib_IntVector_Intrinsics_vec256 t10 = a140; + Lib_IntVector_Intrinsics_vec256 t20 = a240; + Lib_IntVector_Intrinsics_vec256 t30 = a340; + Lib_IntVector_Intrinsics_vec256 t40 = a440; + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260); + Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260); + Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00); + Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10); + Lib_IntVector_Intrinsics_vec256 + z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5); + Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260); + Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260); + Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010); + Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12); + Lib_IntVector_Intrinsics_vec256 + z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260); + Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260); + Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020); + Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130); + Lib_IntVector_Intrinsics_vec256 + z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260); + Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030); + Lib_IntVector_Intrinsics_vec256 o00 = x020; + Lib_IntVector_Intrinsics_vec256 o10 = x120; + Lib_IntVector_Intrinsics_vec256 o20 = x210; + Lib_IntVector_Intrinsics_vec256 o30 = x320; + Lib_IntVector_Intrinsics_vec256 o40 = x420; + rn[0U] = o00; + rn[1U] = o10; + rn[2U] = o20; + rn[3U] = o30; + rn[4U] = o40; + Lib_IntVector_Intrinsics_vec256 f201 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 f211 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 f221 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 f231 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 f241 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f201, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f211, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f221, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f231, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f241, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 r00 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = rn_5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = rn_5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = rn_5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = rn_5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r00, f10); + Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec256 + a01 = + Lib_IntVector_Intrinsics_vec256_add64(a0, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f11)); + Lib_IntVector_Intrinsics_vec256 + a11 = + Lib_IntVector_Intrinsics_vec256_add64(a1, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f11)); + Lib_IntVector_Intrinsics_vec256 + a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, Lib_IntVector_Intrinsics_vec256_mul64(r1, f11)); + Lib_IntVector_Intrinsics_vec256 + a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, Lib_IntVector_Intrinsics_vec256_mul64(r2, f11)); + Lib_IntVector_Intrinsics_vec256 + a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, Lib_IntVector_Intrinsics_vec256_mul64(r3, f11)); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f12)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f12)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f12)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f12)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f12)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f13)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f13)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f13)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f13)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f13)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r51, f14)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f14)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f14)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f14)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f14)); + Lib_IntVector_Intrinsics_vec256 t0 = a04; + Lib_IntVector_Intrinsics_vec256 t1 = a14; + Lib_IntVector_Intrinsics_vec256 t2 = a24; + Lib_IntVector_Intrinsics_vec256 t3 = a34; + Lib_IntVector_Intrinsics_vec256 t4 = a44; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z120); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + rn[0U] = o0; + rn[1U] = o1; + rn[2U] = o2; + rn[3U] = o3; + rn[4U] = o4; + Lib_IntVector_Intrinsics_vec256 f202 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 f21 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 f22 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 f23 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 f24 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f202, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f21, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f22, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f23, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f24, (uint64_t)5U); +} + +void +Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t0 = a06; + Lib_IntVector_Intrinsics_vec256 t1 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_256_poly1305_update( + Lib_IntVector_Intrinsics_vec256 *ctx, + uint32_t len, + uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + uint32_t sz_block = (uint32_t)64U; + uint32_t len0 = len / sz_block * sz_block; + uint8_t *t0 = text; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)64U; + uint8_t *text0 = t0; + Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t0 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); + Lib_IntVector_Intrinsics_vec256 + hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); + Lib_IntVector_Intrinsics_vec256 + t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); + Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260); + Lib_IntVector_Intrinsics_vec256 + t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260); + Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260); + Lib_IntVector_Intrinsics_vec256 + t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); + Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260); + Lib_IntVector_Intrinsics_vec256 + o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 o00 = o5; + Lib_IntVector_Intrinsics_vec256 o11 = o10; + Lib_IntVector_Intrinsics_vec256 o21 = o20; + Lib_IntVector_Intrinsics_vec256 o31 = o30; + Lib_IntVector_Intrinsics_vec256 o41 = o40; + e[0U] = o00; + e[1U] = o11; + e[2U] = o21; + e[3U] = o31; + e[4U] = o41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 *rn5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f110 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f120 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f130 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f140 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec256 + a01 = + Lib_IntVector_Intrinsics_vec256_add64(a0, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec256 + a11 = + Lib_IntVector_Intrinsics_vec256_add64(a1, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec256 + a21 = + Lib_IntVector_Intrinsics_vec256_add64(a2, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec256 + a31 = + Lib_IntVector_Intrinsics_vec256_add64(a3, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec256 + a41 = + Lib_IntVector_Intrinsics_vec256_add64(a4, + Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec256 t01 = a04; + Lib_IntVector_Intrinsics_vec256 t1 = a14; + Lib_IntVector_Intrinsics_vec256 t2 = a24; + Lib_IntVector_Intrinsics_vec256 t3 = a34; + Lib_IntVector_Intrinsics_vec256 t4 = a44; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o01 = x02; + Lib_IntVector_Intrinsics_vec256 o12 = x12; + Lib_IntVector_Intrinsics_vec256 o22 = x21; + Lib_IntVector_Intrinsics_vec256 o32 = x32; + Lib_IntVector_Intrinsics_vec256 o42 = x42; + acc[0U] = o01; + acc[1U] = o12; + acc[2U] = o22; + acc[3U] = o32; + acc[4U] = o42; + Lib_IntVector_Intrinsics_vec256 f100 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); + Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); + Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); + Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); + Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc, pre); + } + uint32_t len1 = len - len0; + uint8_t *t1 = text + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t1 + i * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem > (uint32_t)0U) { + uint8_t *last = t1 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 fi = e[rem * (uint32_t)8U / (uint32_t)26U]; + e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_256_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec256 *ctx) +{ + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + Lib_IntVector_Intrinsics_vec256 f0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f13 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f23 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f33 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f40 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 + l0 = Lib_IntVector_Intrinsics_vec256_add64(f0, Lib_IntVector_Intrinsics_vec256_zero); + Lib_IntVector_Intrinsics_vec256 + tmp00 = + Lib_IntVector_Intrinsics_vec256_and(l0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c00 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(f13, c00); + Lib_IntVector_Intrinsics_vec256 + tmp10 = + Lib_IntVector_Intrinsics_vec256_and(l1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c10 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(f23, c10); + Lib_IntVector_Intrinsics_vec256 + tmp20 = + Lib_IntVector_Intrinsics_vec256_and(l2, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c20 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(f33, c20); + Lib_IntVector_Intrinsics_vec256 + tmp30 = + Lib_IntVector_Intrinsics_vec256_and(l3, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c30 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l4 = Lib_IntVector_Intrinsics_vec256_add64(f40, c30); + Lib_IntVector_Intrinsics_vec256 + tmp40 = + Lib_IntVector_Intrinsics_vec256_and(l4, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c40 = Lib_IntVector_Intrinsics_vec256_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_add64(tmp00, + Lib_IntVector_Intrinsics_vec256_smul64(c40, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec256 f110 = tmp10; + Lib_IntVector_Intrinsics_vec256 f210 = tmp20; + Lib_IntVector_Intrinsics_vec256 f310 = tmp30; + Lib_IntVector_Intrinsics_vec256 f410 = tmp40; + Lib_IntVector_Intrinsics_vec256 + l = Lib_IntVector_Intrinsics_vec256_add64(f010, Lib_IntVector_Intrinsics_vec256_zero); + Lib_IntVector_Intrinsics_vec256 + tmp0 = + Lib_IntVector_Intrinsics_vec256_and(l, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l5 = Lib_IntVector_Intrinsics_vec256_add64(f110, c0); + Lib_IntVector_Intrinsics_vec256 + tmp1 = + Lib_IntVector_Intrinsics_vec256_and(l5, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l5, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l6 = Lib_IntVector_Intrinsics_vec256_add64(f210, c1); + Lib_IntVector_Intrinsics_vec256 + tmp2 = + Lib_IntVector_Intrinsics_vec256_and(l6, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l6, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l7 = Lib_IntVector_Intrinsics_vec256_add64(f310, c2); + Lib_IntVector_Intrinsics_vec256 + tmp3 = + Lib_IntVector_Intrinsics_vec256_and(l7, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l7, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l8 = Lib_IntVector_Intrinsics_vec256_add64(f410, c3); + Lib_IntVector_Intrinsics_vec256 + tmp4 = + Lib_IntVector_Intrinsics_vec256_and(l8, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l8, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + f02 = + Lib_IntVector_Intrinsics_vec256_add64(tmp0, + Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec256 f12 = tmp1; + Lib_IntVector_Intrinsics_vec256 f22 = tmp2; + Lib_IntVector_Intrinsics_vec256 f32 = tmp3; + Lib_IntVector_Intrinsics_vec256 f42 = tmp4; + Lib_IntVector_Intrinsics_vec256 + mh = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + ml = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffffbU); + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_eq64(f42, mh); + Lib_IntVector_Intrinsics_vec256 + mask1 = + Lib_IntVector_Intrinsics_vec256_and(mask, + Lib_IntVector_Intrinsics_vec256_eq64(f32, mh)); + Lib_IntVector_Intrinsics_vec256 + mask2 = + Lib_IntVector_Intrinsics_vec256_and(mask1, + Lib_IntVector_Intrinsics_vec256_eq64(f22, mh)); + Lib_IntVector_Intrinsics_vec256 + mask3 = + Lib_IntVector_Intrinsics_vec256_and(mask2, + Lib_IntVector_Intrinsics_vec256_eq64(f12, mh)); + Lib_IntVector_Intrinsics_vec256 + mask4 = + Lib_IntVector_Intrinsics_vec256_and(mask3, + Lib_IntVector_Intrinsics_vec256_lognot(Lib_IntVector_Intrinsics_vec256_gt64(ml, f02))); + Lib_IntVector_Intrinsics_vec256 ph = Lib_IntVector_Intrinsics_vec256_and(mask4, mh); + Lib_IntVector_Intrinsics_vec256 pl = Lib_IntVector_Intrinsics_vec256_and(mask4, ml); + Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_sub64(f02, pl); + Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_sub64(f12, ph); + Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_sub64(f22, ph); + Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_sub64(f32, ph); + Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_sub64(f42, ph); + Lib_IntVector_Intrinsics_vec256 f011 = o0; + Lib_IntVector_Intrinsics_vec256 f111 = o1; + Lib_IntVector_Intrinsics_vec256 f211 = o2; + Lib_IntVector_Intrinsics_vec256 f311 = o3; + Lib_IntVector_Intrinsics_vec256 f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + Lib_IntVector_Intrinsics_vec256 f00 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f4 = acc[4U]; + uint64_t f01 = Lib_IntVector_Intrinsics_vec256_extract64(f00, (uint32_t)0U); + uint64_t f112 = Lib_IntVector_Intrinsics_vec256_extract64(f1, (uint32_t)0U); + uint64_t f212 = Lib_IntVector_Intrinsics_vec256_extract64(f2, (uint32_t)0U); + uint64_t f312 = Lib_IntVector_Intrinsics_vec256_extract64(f3, (uint32_t)0U); + uint64_t f41 = Lib_IntVector_Intrinsics_vec256_extract64(f4, (uint32_t)0U); + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; + Hacl_Poly1305_256_poly1305_init(ctx, key); + Hacl_Poly1305_256_poly1305_update(ctx, len, text); + Hacl_Poly1305_256_poly1305_finish(tag, key, ctx); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h new file mode 100644 index 0000000000..d9bf5fd831 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h @@ -0,0 +1,63 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Poly1305_256_H +#define __Hacl_Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" +#include "libintvector.h" +typedef Lib_IntVector_Intrinsics_vec256 *Hacl_Poly1305_256_poly1305_ctx; + +void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key); + +void Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text); + +void +Hacl_Poly1305_256_poly1305_update( + Lib_IntVector_Intrinsics_vec256 *ctx, + uint32_t len, + uint8_t *text); + +void +Hacl_Poly1305_256_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec256 *ctx); + +void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Poly1305_256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c new file mode 100644 index 0000000000..8de2eca7f1 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c @@ -0,0 +1,574 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Poly1305_32.h" + +#include "internal/Hacl_Krmllib.h" + +void +Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key) +{ + uint64_t *acc = ctx; + uint64_t *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = (uint64_t)0U; + acc[1U] = (uint64_t)0U; + acc[2U] = (uint64_t)0U; + acc[3U] = (uint64_t)0U; + acc[4U] = (uint64_t)0U; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t *rn = pre + (uint32_t)10U; + uint64_t *rn_5 = pre + (uint32_t)15U; + uint64_t r_vec0 = lo1; + uint64_t r_vec1 = hi1; + uint64_t f00 = r_vec0 & (uint64_t)0x3ffffffU; + uint64_t f10 = r_vec0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = r_vec0 >> (uint32_t)52U | (r_vec1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = r_vec1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = r_vec1 >> (uint32_t)40U; + uint64_t f0 = f00; + uint64_t f1 = f10; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + uint64_t f200 = r[0U]; + uint64_t f21 = r[1U]; + uint64_t f22 = r[2U]; + uint64_t f23 = r[3U]; + uint64_t f24 = r[4U]; + r5[0U] = f200 * (uint64_t)5U; + r5[1U] = f21 * (uint64_t)5U; + r5[2U] = f22 * (uint64_t)5U; + r5[3U] = f23 * (uint64_t)5U; + r5[4U] = f24 * (uint64_t)5U; + rn[0U] = r[0U]; + rn[1U] = r[1U]; + rn[2U] = r[2U]; + rn[3U] = r[3U]; + rn[4U] = r[4U]; + rn_5[0U] = r5[0U]; + rn_5[1U] = r5[1U]; + rn_5[2U] = r5[2U]; + rn_5[3U] = r5[3U]; + rn_5[4U] = r5[4U]; +} + +void +Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text) +{ + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text) +{ + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint32_t nb = len / (uint32_t)16U; + uint32_t rem = len % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem > (uint32_t)0U) { + uint8_t *last = text + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem * (uint32_t)8U / (uint32_t)26U]; + e[rem * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx) +{ + uint64_t *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + uint64_t f0 = acc[0U]; + uint64_t f13 = acc[1U]; + uint64_t f23 = acc[2U]; + uint64_t f33 = acc[3U]; + uint64_t f40 = acc[4U]; + uint64_t l0 = f0 + (uint64_t)0U; + uint64_t tmp00 = l0 & (uint64_t)0x3ffffffU; + uint64_t c00 = l0 >> (uint32_t)26U; + uint64_t l1 = f13 + c00; + uint64_t tmp10 = l1 & (uint64_t)0x3ffffffU; + uint64_t c10 = l1 >> (uint32_t)26U; + uint64_t l2 = f23 + c10; + uint64_t tmp20 = l2 & (uint64_t)0x3ffffffU; + uint64_t c20 = l2 >> (uint32_t)26U; + uint64_t l3 = f33 + c20; + uint64_t tmp30 = l3 & (uint64_t)0x3ffffffU; + uint64_t c30 = l3 >> (uint32_t)26U; + uint64_t l4 = f40 + c30; + uint64_t tmp40 = l4 & (uint64_t)0x3ffffffU; + uint64_t c40 = l4 >> (uint32_t)26U; + uint64_t f010 = tmp00 + c40 * (uint64_t)5U; + uint64_t f110 = tmp10; + uint64_t f210 = tmp20; + uint64_t f310 = tmp30; + uint64_t f410 = tmp40; + uint64_t l = f010 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c0 = l >> (uint32_t)26U; + uint64_t l5 = f110 + c0; + uint64_t tmp1 = l5 & (uint64_t)0x3ffffffU; + uint64_t c1 = l5 >> (uint32_t)26U; + uint64_t l6 = f210 + c1; + uint64_t tmp2 = l6 & (uint64_t)0x3ffffffU; + uint64_t c2 = l6 >> (uint32_t)26U; + uint64_t l7 = f310 + c2; + uint64_t tmp3 = l7 & (uint64_t)0x3ffffffU; + uint64_t c3 = l7 >> (uint32_t)26U; + uint64_t l8 = f410 + c3; + uint64_t tmp4 = l8 & (uint64_t)0x3ffffffU; + uint64_t c4 = l8 >> (uint32_t)26U; + uint64_t f02 = tmp0 + c4 * (uint64_t)5U; + uint64_t f12 = tmp1; + uint64_t f22 = tmp2; + uint64_t f32 = tmp3; + uint64_t f42 = tmp4; + uint64_t mh = (uint64_t)0x3ffffffU; + uint64_t ml = (uint64_t)0x3fffffbU; + uint64_t mask = FStar_UInt64_eq_mask(f42, mh); + uint64_t mask1 = mask & FStar_UInt64_eq_mask(f32, mh); + uint64_t mask2 = mask1 & FStar_UInt64_eq_mask(f22, mh); + uint64_t mask3 = mask2 & FStar_UInt64_eq_mask(f12, mh); + uint64_t mask4 = mask3 & ~~FStar_UInt64_gte_mask(f02, ml); + uint64_t ph = mask4 & mh; + uint64_t pl = mask4 & ml; + uint64_t o0 = f02 - pl; + uint64_t o1 = f12 - ph; + uint64_t o2 = f22 - ph; + uint64_t o3 = f32 - ph; + uint64_t o4 = f42 - ph; + uint64_t f011 = o0; + uint64_t f111 = o1; + uint64_t f211 = o2; + uint64_t f311 = o3; + uint64_t f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + uint64_t f00 = acc[0U]; + uint64_t f1 = acc[1U]; + uint64_t f2 = acc[2U]; + uint64_t f3 = acc[3U]; + uint64_t f4 = acc[4U]; + uint64_t f01 = f00; + uint64_t f112 = f1; + uint64_t f212 = f2; + uint64_t f312 = f3; + uint64_t f41 = f4; + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + uint64_t ctx[25U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, key); + Hacl_Poly1305_32_poly1305_update(ctx, len, text); + Hacl_Poly1305_32_poly1305_finish(tag, key, ctx); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h new file mode 100644 index 0000000000..84a2f606b1 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h @@ -0,0 +1,55 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Poly1305_32_H +#define __Hacl_Poly1305_32_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +typedef uint64_t *Hacl_Poly1305_32_poly1305_ctx; + +void Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key); + +void Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text); + +void Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text); + +void Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx); + +void Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Poly1305_32_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/config.h b/security/nss/lib/freebl/verified/config.h new file mode 100644 index 0000000000..e69de29bb2 diff --git a/security/nss/lib/freebl/verified/curve25519-inline.h b/security/nss/lib/freebl/verified/curve25519-inline.h new file mode 100644 index 0000000000..690e75a1b9 --- /dev/null +++ b/security/nss/lib/freebl/verified/curve25519-inline.h @@ -0,0 +1,942 @@ +#ifdef __GNUC__ +#if defined(__x86_64__) || defined(_M_X64) +#pragma once +#include + +// Computes the addition of four-element f1 with value in f2 +// and returns the carry (if any) +static inline void +add_scalar(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + __asm__ volatile( + // Clear registers to propagate the carry bit + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %%eax, %%eax;" + + // Begin addition chain + " addq 0(%2), %0;" + " movq %0, 0(%1);" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%1);" + " adcxq 16(%2), %%r9;" + " movq %%r9, 16(%1);" + " adcxq 24(%2), %%r10;" + " movq %%r10, 24(%1);" + + // Return the carry bit in a register + " adcx %%r11, %%rax;" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes the field addition of two field elements +static inline void +fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + __asm__ volatile( + // Compute the raw addition of f1 + f2 + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + // Step 2: Add carry*38 to the original sum + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes the field substraction of two field elements +static inline void +fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + __asm__ volatile( + // Compute the raw substraction of f1-f2 + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + // Step 2: Substract carry*38 from the original difference + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + // Store the result + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes a field multiplication: out <- f1 * f2 +// Uses the 8-element buffer tmp for intermediate results +static inline void +fmul(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication: tmp <- src1 * src2 ////// + + // Compute src1[0] * src2 + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + // Line up pointers + " mov %2, %0;" + " mov %3, %2;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc"); +} + +// Computes two field multiplications: +// out[0] <- f1[0] * f2[0] +// out[1] <- f1[1] * f2[1] +// Uses the 16-element buffer tmp for intermediate results: +static inline void +fmul2(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication tmp[0] <- f1[0] * f2[0] ////// + + // Compute src1[0] * src2 + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /////// Compute the raw multiplication tmp[1] <- f1[1] * f2[1] ////// + + // Compute src1[0] * src2 + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + // Line up pointers + " mov %2, %0;" + " mov %3, %2;" + + /////// Wrap the results back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc"); +} + +// Computes the field multiplication of four-element f1 with value in f2 +// Requires f2 to be smaller than 2^17 +static inline void +fmul_scalar(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + register uint64_t f2_r __asm__("rdx") = f2; + + __asm__ volatile( + // Compute the raw multiplication of f1*f2 + " mulxq 0(%2), %%r8, %%rcx;" // f1[0]*f2 + " mulxq 8(%2), %%r9, %%rbx;" // f1[1]*f2 + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" // f1[2]*f2 + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" // f1[3]*f2 + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", "memory", "cc"); +} + +// Computes p1 <- bit ? p2 : p1 in constant time +static inline void +cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + __asm__ volatile( + // Transfer bit into CF flag + " add $18446744073709551615, %0;" + + // cswap p1[0], p2[0] + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + // cswap p1[1], p2[1] + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + // cswap p1[2], p2[2] + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + // cswap p1[3], p2[3] + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + // cswap p1[4], p2[4] + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + // cswap p1[5], p2[5] + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + // cswap p1[6], p2[6] + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + // cswap p1[7], p2[7] + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +// Computes the square of a field element: out <- f * f +// Uses the 8-element buffer tmp for intermediate results +static inline void +fsqr(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication: tmp <- f * f ////// + + // Step 1: Compute all partial products + " movq 0(%0), %%rdx;" // f[0] + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 24(%0), %%rdx;" // f[3] + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + // Line up pointers + " mov %1, %0;" + " mov %2, %1;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc"); +} + +// Computes two field squarings: +// out[0] <- f[0] * f[0] +// out[1] <- f[1] * f[1] +// Uses the 16-element buffer tmp for intermediate results +static inline void +fsqr2(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ + __asm__ volatile( + // Step 1: Compute all partial products + " movq 0(%0), %%rdx;" // f[0] + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 24(%0), %%rdx;" // f[3] + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + // Step 1: Compute all partial products + " movq 32(%0), %%rdx;" // f[0] + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 56(%0), %%rdx;" // f[3] + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + // Line up pointers + " mov %1, %0;" + " mov %2, %1;" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc"); +} + +#endif /* defined(__x86_64__) || defined(_M_X64) */ +#endif /* __GNUC__ */ diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h b/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h new file mode 100644 index 0000000000..6080d37873 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h @@ -0,0 +1,312 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Bignum_H +#define __internal_Hacl_Bignum_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Krmllib.h" +#include "../Hacl_Bignum.h" +#include "lib_intrinsics.h" +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *b, + uint32_t *tmp, + uint32_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *b, + uint64_t *tmp, + uint64_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *tmp, + uint32_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *tmp, + uint64_t *res); + +void +Hacl_Bignum_bn_add_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_bn_add_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_bn_sub_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_bn_sub_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res); + +uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0); + +uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0); + +uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *res); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *c, + uint32_t *res); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *r2, + uint32_t *a, + uint32_t *aM); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *a); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *bM, + uint32_t *resM); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *resM); + +uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *res); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *c, + uint64_t *res); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *r2, + uint64_t *a, + uint64_t *aM); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *a); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *bM, + uint64_t *resM); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *resM); + +uint32_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32( + uint32_t len, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +uint64_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64( + uint32_t len, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Bignum_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h new file mode 100644 index 0000000000..51ecfeef39 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h @@ -0,0 +1,50 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Chacha20_H +#define __internal_Hacl_Chacha20_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Chacha20.h" + +extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U]; + +void Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr); + +void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Chacha20_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h new file mode 100644 index 0000000000..d7d05e89f5 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h @@ -0,0 +1,53 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Curve25519_51_H +#define __internal_Hacl_Curve25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Krmllib.h" +#include "../Hacl_Curve25519_51.h" + +void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n); + +void Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Curve25519_51_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h new file mode 100644 index 0000000000..02ee03247a --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h @@ -0,0 +1,49 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Hash_SHA1_H +#define __internal_Hacl_Hash_SHA1_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Hash_SHA1.h" + +void Hacl_Hash_Core_SHA1_legacy_init(uint32_t *s); + +void Hacl_Hash_Core_SHA1_legacy_update(uint32_t *h, uint8_t *l); + +void Hacl_Hash_Core_SHA1_legacy_finish(uint32_t *s, uint8_t *dst); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_SHA1_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h new file mode 100644 index 0000000000..ed9894e717 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h @@ -0,0 +1,65 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Hash_SHA2_H +#define __internal_Hacl_Hash_SHA2_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Hash_SHA2.h" + +void Hacl_Hash_Core_SHA2_init_224(uint32_t *s); + +void Hacl_Hash_Core_SHA2_init_256(uint32_t *s); + +void Hacl_Hash_Core_SHA2_init_384(uint64_t *s); + +void Hacl_Hash_Core_SHA2_init_512(uint64_t *s); + +void Hacl_Hash_Core_SHA2_update_384(uint64_t *hash, uint8_t *block); + +void Hacl_Hash_Core_SHA2_update_512(uint64_t *hash, uint8_t *block); + +void Hacl_Hash_Core_SHA2_pad_256(uint64_t len, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_224(uint32_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_256(uint32_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_384(uint64_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_512(uint64_t *s, uint8_t *dst); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_SHA2_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h b/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h new file mode 100644 index 0000000000..3778437448 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h @@ -0,0 +1,45 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Krmllib_H +#define __internal_Hacl_Krmllib_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Krmllib.h" + +uint32_t LowStar_Vector_new_capacity(uint32_t cap); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Krmllib_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h new file mode 100644 index 0000000000..d5f257302d --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Poly1305_128_H +#define __internal_Hacl_Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Poly1305_128.h" +#include "libintvector.h" +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Poly1305_128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h new file mode 100644 index 0000000000..9b10379237 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Poly1305_256_H +#define __internal_Hacl_Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Poly1305_256.h" +#include "libintvector.h" +void +Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( + Lib_IntVector_Intrinsics_vec256 *out, + Lib_IntVector_Intrinsics_vec256 *p); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Poly1305_256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Spec.h b/security/nss/lib/freebl/verified/internal/Hacl_Spec.h new file mode 100644 index 0000000000..cf5376abab --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Spec.h @@ -0,0 +1,59 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Spec_H +#define __internal_Hacl_Spec_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Spec.h" + +#define Spec_ECDSA_NoHash 0 +#define Spec_ECDSA_Hash 1 + +typedef uint8_t Spec_ECDSA_hash_alg_ecdsa_tags; + +typedef struct Spec_ECDSA_hash_alg_ecdsa_s { + Spec_ECDSA_hash_alg_ecdsa_tags tag; + Spec_Hash_Definitions_hash_alg _0; +} Spec_ECDSA_hash_alg_ecdsa; + +#define Spec_Cipher_Expansion_Hacl_CHACHA20 0 +#define Spec_Cipher_Expansion_Vale_AES128 1 +#define Spec_Cipher_Expansion_Vale_AES256 2 + +typedef uint8_t Spec_Cipher_Expansion_impl; + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Spec_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Vale.h b/security/nss/lib/freebl/verified/internal/Vale.h new file mode 100644 index 0000000000..400650e95f --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Vale.h @@ -0,0 +1,184 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Vale_H +#define __internal_Vale_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +extern uint64_t add_scalar_e(uint64_t *x0, uint64_t *x1, uint64_t x2); + +extern uint64_t fadd_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t sha256_update(uint32_t *x0, uint8_t *x1, uint64_t x2, uint32_t *x3); + +extern uint64_t x64_poly1305(uint8_t *x0, uint8_t *x1, uint64_t x2, uint64_t x3); + +extern uint64_t check_aesni(); + +extern uint64_t check_sha(); + +extern uint64_t check_adx_bmi2(); + +extern uint64_t check_avx(); + +extern uint64_t check_avx2(); + +extern uint64_t check_movbe(); + +extern uint64_t check_sse(); + +extern uint64_t check_rdrand(); + +extern uint64_t check_avx512(); + +extern uint64_t check_osxsave(); + +extern uint64_t check_avx_xcr0(); + +extern uint64_t check_avx512_xcr0(); + +extern uint64_t +gcm128_decrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t +gcm256_decrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t aes128_key_expansion(uint8_t *x0, uint8_t *x1); + +extern uint64_t aes256_key_expansion(uint8_t *x0, uint8_t *x1); + +extern uint64_t +compute_iv_stdcall( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5); + +extern uint64_t +gcm128_encrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t +gcm256_encrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t aes128_keyhash_init(uint8_t *x0, uint8_t *x1); + +extern uint64_t aes256_keyhash_init(uint8_t *x0, uint8_t *x1); + +extern uint64_t cswap2_e(uint64_t x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t fsqr_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t fsqr2_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t fmul_e(uint64_t *x0, uint64_t *x1, uint64_t *x2, uint64_t *x3); + +extern uint64_t fmul2_e(uint64_t *x0, uint64_t *x1, uint64_t *x2, uint64_t *x3); + +extern uint64_t fmul_scalar_e(uint64_t *x0, uint64_t *x1, uint64_t x2); + +extern uint64_t fsub_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Vale_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h b/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h new file mode 100644 index 0000000000..21d7e1b4f9 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h @@ -0,0 +1,13 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_ENDIAN_H +#define __KRML_ENDIAN_H + +#ifdef __GNUC__ +#warning "c_endianness.h is deprecated, include lowstar_endianness.h instead" +#endif + +#include "lowstar_endianness.h" + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h b/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h new file mode 100644 index 0000000000..c7a5afb50a --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h @@ -0,0 +1,89 @@ +#ifndef __FSTAR_INT_H +#define __FSTAR_INT_H + +#include "internal/types.h" + +/* + * Arithmetic Shift Right operator + * + * In all C standards, a >> b is implementation-defined when a has a signed + * type and a negative value. See e.g. 6.5.7 in + * http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2310.pdf + * + * GCC, MSVC, and Clang implement a >> b as an arithmetic shift. + * + * GCC: https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Integers-implementation.html#Integers-implementation + * MSVC: https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=vs-2019#right-shifts + * Clang: tested that Clang 7, 8 and 9 compile this to an arithmetic shift + * + * We implement arithmetic shift right simply as >> in these compilers + * and bail out in others. + */ + +#if !(defined(_MSC_VER) || defined(__GNUC__) || (defined(__clang__) && (__clang_major__ >= 7))) + +static inline int8_t +FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int16_t +FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int32_t +FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int64_t +FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +#else + +static inline int8_t +FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int16_t +FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int32_t +FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int64_t +FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b) +{ + return (a >> b); +} + +#endif /* !(defined(_MSC_VER) ... ) */ + +#endif /* __FSTAR_INT_H */ diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h new file mode 100644 index 0000000000..f55e5f824e --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h @@ -0,0 +1,16 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_BUILTIN_H +#define __KRML_BUILTIN_H + +/* For alloca, when using KaRaMeL's -falloca */ +#if (defined(_WIN32) || defined(_WIN64)) +#include +#endif + +/* If some globals need to be initialized before the main, then karamel will + * generate and try to link last a function with this type: */ +void krmlinit_globals(void); + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h new file mode 100644 index 0000000000..0d250c4450 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h @@ -0,0 +1,46 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_CALLCONV_H +#define __KRML_CALLCONV_H + +/******************************************************************************/ +/* Some macros to ease compatibility */ +/******************************************************************************/ + +/* We want to generate __cdecl safely without worrying about it being undefined. + * When using MSVC, these are always defined. When using MinGW, these are + * defined too. They have no meaning for other platforms, so we define them to + * be empty macros in other situations. */ +#ifndef _MSC_VER +#ifndef __cdecl +#define __cdecl +#endif +#ifndef __stdcall +#define __stdcall +#endif +#ifndef __fastcall +#define __fastcall +#endif +#endif + +/* Since KaRaMeL emits the inline keyword unconditionally, we follow the + * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this + * __inline__ to ensure the code compiles with -std=c90 and earlier. */ +#ifdef __GNUC__ +#define inline __inline__ +#endif + +/* GCC-specific attribute syntax; everyone else gets the standard C inline + * attribute. */ +#ifdef __GNU_C__ +#ifndef __clang__ +#define force_inline inline __attribute__((always_inline)) +#else +#define force_inline inline +#endif +#else +#define force_inline inline +#endif + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h new file mode 100644 index 0000000000..964d1c52aa --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h @@ -0,0 +1,32 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_COMPAT_H +#define KRML_COMPAT_H + +#include + +/* A series of macros that define C implementations of types that are not Low*, + * to facilitate porting programs to Low*. */ + +typedef struct { + uint32_t length; + const char *data; +} FStar_Bytes_bytes; + +typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int, + krml_checked_int_t; + +#define RETURN_OR(x) \ + do { \ + int64_t __ret = x; \ + if (__ret < INT32_MIN || INT32_MAX < __ret) { \ + KRML_HOST_PRINTF( \ + "Prims.{int,nat,pos} integer overflow at %s:%d\n", __FILE__, \ + __LINE__); \ + KRML_HOST_EXIT(252); \ + } \ + return (int32_t)__ret; \ + } while (0) + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h new file mode 100644 index 0000000000..f70006bd3f --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h @@ -0,0 +1,57 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_DEBUG_H +#define __KRML_DEBUG_H + +#include + +#include "krml/internal/target.h" + +/******************************************************************************/ +/* Debugging helpers - intended only for KaRaMeL developers */ +/******************************************************************************/ + +/* In support of "-wasm -d force-c": we might need this function to be + * forward-declared, because the dependency on WasmSupport appears very late, + * after SimplifyWasm, and sadly, after the topological order has been done. */ +void WasmSupport_check_buffer_size(uint32_t s); + +/* A series of GCC atrocities to trace function calls (karamel's [-d c-calls] + * option). Useful when trying to debug, say, Wasm, to compare traces. */ +/* clang-format off */ +#ifdef __GNUC__ +#define KRML_FORMAT(X) _Generic((X), \ + uint8_t : "0x%08" PRIx8, \ + uint16_t: "0x%08" PRIx16, \ + uint32_t: "0x%08" PRIx32, \ + uint64_t: "0x%08" PRIx64, \ + int8_t : "0x%08" PRIx8, \ + int16_t : "0x%08" PRIx16, \ + int32_t : "0x%08" PRIx32, \ + int64_t : "0x%08" PRIx64, \ + default : "%s") + +#define KRML_FORMAT_ARG(X) _Generic((X), \ + uint8_t : X, \ + uint16_t: X, \ + uint32_t: X, \ + uint64_t: X, \ + int8_t : X, \ + int16_t : X, \ + int32_t : X, \ + int64_t : X, \ + default : "unknown") +/* clang-format on */ + +#define KRML_DEBUG_RETURN(X) \ + ({ \ + __auto_type _ret = (X); \ + KRML_HOST_PRINTF("returning: "); \ + KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret)); \ + KRML_HOST_PRINTF(" \n"); \ + _ret; \ + }) +#endif + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h new file mode 100644 index 0000000000..929abe8081 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h @@ -0,0 +1,333 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_TARGET_H +#define __KRML_TARGET_H + +#include +#include +#include +#include +#include + +#include "krml/internal/callconv.h" + +/******************************************************************************/ +/* Macros that KaRaMeL will generate. */ +/******************************************************************************/ + +/* For "bare" targets that do not have a C stdlib, the user might want to use + * [-add-early-include '"mydefinitions.h"'] and override these. */ +#ifndef KRML_HOST_PRINTF +#define KRML_HOST_PRINTF printf +#endif + +#if ( \ + (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + (!(defined KRML_HOST_EPRINTF))) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#elif !(defined KRML_HOST_EPRINTF) && defined(_MSC_VER) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#endif + +#ifndef KRML_HOST_EXIT +#define KRML_HOST_EXIT exit +#endif + +#ifndef KRML_HOST_MALLOC +#define KRML_HOST_MALLOC malloc +#endif + +#ifndef KRML_HOST_CALLOC +#define KRML_HOST_CALLOC calloc +#endif + +#ifndef KRML_HOST_FREE +#define KRML_HOST_FREE free +#endif + +#ifndef KRML_PRE_ALIGN +#ifdef _MSC_VER +#define KRML_PRE_ALIGN(X) __declspec(align(X)) +#else +#define KRML_PRE_ALIGN(X) +#endif +#endif + +#ifndef KRML_POST_ALIGN +#ifdef _MSC_VER +#define KRML_POST_ALIGN(X) +#else +#define KRML_POST_ALIGN(X) __attribute__((aligned(X))) +#endif +#endif + +#ifndef KRML_ALIGNED_MALLOC +#ifdef _MSC_VER +#define KRML_ALIGNED_MALLOC(X, Y) _aligned_malloc(Y, X) +#else +#define KRML_ALIGNED_MALLOC(X, Y) aligned_alloc(X, Y) +#endif +#endif + +#ifndef KRML_ALIGNED_FREE +#ifdef _MSC_VER +#define KRML_ALIGNED_FREE(X) _aligned_free(X) +#else +#define KRML_ALIGNED_FREE(X) free(X) +#endif +#endif + +#ifndef KRML_HOST_TIME + +#include + +/* Prims_nat not yet in scope */ +inline static int32_t +krml_time() +{ + return (int32_t)time(NULL); +} + +#define KRML_HOST_TIME krml_time +#endif + +/* In statement position, exiting is easy. */ +#define KRML_EXIT \ + do { \ + KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ + KRML_HOST_EXIT(254); \ + } while (0) + +/* In expression position, use the comma-operator and a malloc to return an + * expression of the right size. KaRaMeL passes t as the parameter to the macro. + */ +#define KRML_EABORT(t, msg) \ + (KRML_HOST_PRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ + KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) + +/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of + * *elements*. Do an ugly, run-time check (some of which KaRaMeL can eliminate). + */ + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +#define _KRML_CHECK_SIZE_PRAGMA \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") +#else +#define _KRML_CHECK_SIZE_PRAGMA +#endif + +#define KRML_CHECK_SIZE(size_elt, sz) \ + do { \ + _KRML_CHECK_SIZE_PRAGMA \ + if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } \ + } while (0) + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg) +#else +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg) +#endif + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +#define KRML_DEPRECATED(x) __attribute__((deprecated(x))) +#elif defined(__GNUC__) +/* deprecated attribute is not defined in GCC < 4.5. */ +#define KRML_DEPRECATED(x) +#elif defined(_MSC_VER) +#define KRML_DEPRECATED(x) __declspec(deprecated(x)) +#endif + +/* Macros for prettier unrolling of loops */ +#define KRML_LOOP1(i, n, x) \ + { \ + x \ + i += n; \ + } + +#define KRML_LOOP2(i, n, x) \ + KRML_LOOP1(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP3(i, n, x) \ + KRML_LOOP2(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP4(i, n, x) \ + KRML_LOOP2(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP5(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP6(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP7(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP3(i, n, x) + +#define KRML_LOOP8(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP4(i, n, x) + +#define KRML_LOOP9(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP10(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP11(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP3(i, n, x) + +#define KRML_LOOP12(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP4(i, n, x) + +#define KRML_LOOP13(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP5(i, n, x) + +#define KRML_LOOP14(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP6(i, n, x) + +#define KRML_LOOP15(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP7(i, n, x) + +#define KRML_LOOP16(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP8(i, n, x) + +#define KRML_UNROLL_FOR(i, z, n, k, x) \ + do { \ + uint32_t i = z; \ + KRML_LOOP##n(i, k, x) \ + } while (0) + +#define KRML_ACTUAL_FOR(i, z, n, k, x) \ + do { \ + for (uint32_t i = z; i < n; i += k) { \ + x \ + } \ + } while (0) + +#ifndef KRML_UNROLL_MAX +#define KRML_UNROLL_MAX 16 +#endif + +/* 1 is the number of loop iterations, i.e. (n - z)/k as evaluated by krml */ +#if 0 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR0(i, z, n, k, x) +#else +#define KRML_MAYBE_FOR0(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 1 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 1, k, x) +#else +#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 2 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 2, k, x) +#else +#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 3 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 3, k, x) +#else +#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 4 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 4, k, x) +#else +#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 5 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 5, k, x) +#else +#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 6 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 6, k, x) +#else +#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 7 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 7, k, x) +#else +#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 8 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 8, k, x) +#else +#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 9 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 9, k, x) +#else +#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 10 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 10, k, x) +#else +#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 11 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 11, k, x) +#else +#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 12 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 12, k, x) +#else +#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 13 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 13, k, x) +#else +#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 14 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 14, k, x) +#else +#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 15 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 15, k, x) +#else +#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 16 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 16, k, x) +#else +#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h new file mode 100644 index 0000000000..2cf1887adf --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h @@ -0,0 +1,105 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_TYPES_H +#define KRML_TYPES_H + +#include +#include +#include +#include + +/* Types which are either abstract, meaning that have to be implemented in C, or + * which are models, meaning that they are swapped out at compile-time for + * hand-written C types (in which case they're marked as noextract). */ + +typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_; +typedef int64_t FStar_Int64_t, FStar_Int64_t_; +typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_; +typedef int32_t FStar_Int32_t, FStar_Int32_t_; +typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_; +typedef int16_t FStar_Int16_t, FStar_Int16_t_; +typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_; +typedef int8_t FStar_Int8_t, FStar_Int8_t_; + +/* Only useful when building krmllib, because it's in the dependency graph of + * FStar.Int.Cast. */ +typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_; +typedef int64_t FStar_Int63_t, FStar_Int63_t_; + +typedef double FStar_Float_float; +typedef uint32_t FStar_Char_char; +typedef FILE *FStar_IO_fd_read, *FStar_IO_fd_write; + +typedef void *FStar_Dyn_dyn; + +typedef const char *C_String_t, *C_String_t_, *C_Compat_String_t, *C_Compat_String_t_; + +typedef int exit_code; +typedef FILE *channel; + +typedef unsigned long long TestLib_cycles; + +typedef uint64_t FStar_Date_dateTime, FStar_Date_timeSpan; + +/* Now Prims.string is no longer illegal with the new model in LowStar.Printf; + * it's operations that produce Prims_string which are illegal. Bring the + * definition into scope by default. */ +typedef const char *Prims_string; + +#if (defined(_MSC_VER) && defined(_M_X64) && !defined(__clang__)) +#define IS_MSVC64 1 +#endif + +/* This code makes a number of assumptions and should be refined. In particular, + * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would + * be easier to just test for defined(__SIZEOF_INT128__) only? */ +#if (defined(__x86_64__) || \ + defined(__x86_64) || \ + defined(__aarch64__) || \ + (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \ + defined(__s390x__) || \ + (defined(_MSC_VER) && defined(_M_X64) && defined(__clang__)) || \ + (defined(__mips__) && defined(__LP64__)) || \ + (defined(__riscv) && __riscv_xlen == 64) || \ + defined(__SIZEOF_INT128__)) +#define HAS_INT128 1 +#endif + +/* The uint128 type is a special case since we offer several implementations of + * it, depending on the compiler and whether the user wants the verified + * implementation or not. */ +#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) +#include +typedef __m128i FStar_UInt128_uint128; +#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) +typedef unsigned __int128 FStar_UInt128_uint128; +#else +typedef struct FStar_UInt128_uint128_s { + uint64_t low; + uint64_t high; +} FStar_UInt128_uint128; +#endif + +/* The former is defined once, here (otherwise, conflicts for test-c89. The + * latter is for internal use. */ +typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; + +#include "krml/lowstar_endianness.h" + +#endif + +/* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64, + * then don't bring the uint128 definitions into scope. */ +#ifndef __FStar_UInt_8_16_32_64_H + +#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) +#include "fstar_uint128_msvc.h" +#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) +#include "fstar_uint128_gcc64.h" +#else +#include "FStar_UInt128_Verified.h" +#include "fstar_uint128_struct_endianness.h" +#endif + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h new file mode 100644 index 0000000000..b44fa3f75d --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h @@ -0,0 +1,5 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file is automatically included when compiling with -wasm -d force-c */ +#define WasmSupport_check_buffer_size(X) diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h b/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h new file mode 100644 index 0000000000..48e9fd5795 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h @@ -0,0 +1,242 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __LOWSTAR_ENDIANNESS_H +#define __LOWSTAR_ENDIANNESS_H + +#include +#include + +/******************************************************************************/ +/* Implementing C.fst (part 2: endian-ness macros) */ +/******************************************************************************/ + +/* ... for Linux */ +#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__) || defined(__GLIBC__) +#include + +/* ... for OSX */ +#elif defined(__APPLE__) +#include +#define htole64(x) OSSwapHostToLittleInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) +#define htobe64(x) OSSwapHostToBigInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) + +#define htole16(x) OSSwapHostToLittleInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) +#define htobe16(x) OSSwapHostToBigInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) + +#define htole32(x) OSSwapHostToLittleInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htobe32(x) OSSwapHostToBigInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) + +/* ... for Solaris */ +#elif defined(__sun__) +#include +#define htole64(x) LE_64(x) +#define le64toh(x) LE_64(x) +#define htobe64(x) BE_64(x) +#define be64toh(x) BE_64(x) + +#define htole16(x) LE_16(x) +#define le16toh(x) LE_16(x) +#define htobe16(x) BE_16(x) +#define be16toh(x) BE_16(x) + +#define htole32(x) LE_32(x) +#define le32toh(x) LE_32(x) +#define htobe32(x) BE_32(x) +#define be32toh(x) BE_32(x) + +/* ... for the BSDs */ +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +#include +#elif defined(__OpenBSD__) +#include + +/* ... for Windows (MSVC)... not targeting XBOX 360! */ +#elif defined(_MSC_VER) + +#include +#define htobe16(x) _byteswap_ushort(x) +#define htole16(x) (x) +#define be16toh(x) _byteswap_ushort(x) +#define le16toh(x) (x) + +#define htobe32(x) _byteswap_ulong(x) +#define htole32(x) (x) +#define be32toh(x) _byteswap_ulong(x) +#define le32toh(x) (x) + +#define htobe64(x) _byteswap_uint64(x) +#define htole64(x) (x) +#define be64toh(x) _byteswap_uint64(x) +#define le64toh(x) (x) + +/* ... for Windows (GCC-like, e.g. mingw or clang) */ +#elif (defined(_WIN32) || defined(_WIN64)) && \ + (defined(__GNUC__) || defined(__clang__)) + +#define htobe16(x) __builtin_bswap16(x) +#define htole16(x) (x) +#define be16toh(x) __builtin_bswap16(x) +#define le16toh(x) (x) + +#define htobe32(x) __builtin_bswap32(x) +#define htole32(x) (x) +#define be32toh(x) __builtin_bswap32(x) +#define le32toh(x) (x) + +#define htobe64(x) __builtin_bswap64(x) +#define htole64(x) (x) +#define be64toh(x) __builtin_bswap64(x) +#define le64toh(x) (x) + +/* ... generic big-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* byte swapping code inspired by: + * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h + * */ + +#define htobe32(x) (x) +#define be32toh(x) (x) +#define htole32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define le32toh(x) (htole32((x))) + +#define htobe64(x) (x) +#define be64toh(x) (x) +#define htole64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define le64toh(x) (htole64((x))) + +/* ... generic little-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +/* ... couldn't determine endian-ness of the target platform */ +#else +#error "Please define __BYTE_ORDER__!" + +#endif /* defined(__linux__) || ... */ + +/* Loads and stores. These avoid undefined behavior due to unaligned memory + * accesses, via memcpy. */ + +inline static uint16_t +load16(uint8_t *b) +{ + uint16_t x; + memcpy(&x, b, 2); + return x; +} + +inline static uint32_t +load32(uint8_t *b) +{ + uint32_t x; + memcpy(&x, b, 4); + return x; +} + +inline static uint64_t +load64(uint8_t *b) +{ + uint64_t x; + memcpy(&x, b, 8); + return x; +} + +inline static void +store16(uint8_t *b, uint16_t i) +{ + memcpy(b, &i, 2); +} + +inline static void +store32(uint8_t *b, uint32_t i) +{ + memcpy(b, &i, 4); +} + +inline static void +store64(uint8_t *b, uint64_t i) +{ + memcpy(b, &i, 8); +} + +/* Legacy accessors so that this header can serve as an implementation of + * C.Endianness */ +#define load16_le(b) (le16toh(load16(b))) +#define store16_le(b, i) (store16(b, htole16(i))) +#define load16_be(b) (be16toh(load16(b))) +#define store16_be(b, i) (store16(b, htobe16(i))) + +#define load32_le(b) (le32toh(load32(b))) +#define store32_le(b, i) (store32(b, htole32(i))) +#define load32_be(b) (be32toh(load32(b))) +#define store32_be(b, i) (store32(b, htobe32(i))) + +#define load64_le(b) (le64toh(load64(b))) +#define store64_le(b, i) (store64(b, htole64(i))) +#define load64_be(b) (be64toh(load64(b))) +#define store64_be(b, i) (store64(b, htobe64(i))) + +/* Co-existence of LowStar.Endianness and FStar.Endianness generates name + * conflicts, because of course both insist on having no prefixes. Until a + * prefix is added, or until we truly retire FStar.Endianness, solve this issue + * in an elegant way. */ +#define load16_le0 load16_le +#define store16_le0 store16_le +#define load16_be0 load16_be +#define store16_be0 store16_be + +#define load32_le0 load32_le +#define store32_le0 store32_le +#define load32_be0 load32_be +#define store32_be0 store32_be + +#define load64_le0 load64_le +#define store64_le0 store64_le +#define load64_be0 load64_be +#define store64_be0 store64_be + +#define load128_le0 load128_le +#define store128_le0 store128_le +#define load128_be0 load128_be +#define store128_be0 store128_be + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krmllib.h b/security/nss/lib/freebl/verified/karamel/include/krmllib.h new file mode 100644 index 0000000000..1f461f351c --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krmllib.h @@ -0,0 +1,28 @@ +#ifndef __KRMLLIB_H +#define __KRMLLIB_H + +/******************************************************************************/ +/* The all-in-one krmllib.h header */ +/******************************************************************************/ + +/* This is a meta-header that is included by default in KaRaMeL generated + * programs. If you wish to have a more lightweight set of headers, or are + * targeting an environment where controlling these macros yourself is + * important, consider using: + * + * krml -minimal + * + * to disable the inclusion of this file (note: this also disables the default + * argument "-bundle FStar.*"). You can then include the headers of your choice + * one by one, using -add-early-include. */ + +#include "krml/internal/target.h" +#include "krml/internal/callconv.h" +#include "krml/internal/builtin.h" +#include "krml/internal/debug.h" +#include "krml/internal/types.h" + +#include "krml/lowstar_endianness.h" +#include "krml/fstar_int.h" + +#endif /* __KRMLLIB_H */ diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h new file mode 100644 index 0000000000..4affcee353 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h @@ -0,0 +1,75 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __FStar_UInt128_H +#define __FStar_UInt128_H + +#include +#include +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + +#define __FStar_UInt128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h new file mode 100644 index 0000000000..8f235c3146 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h @@ -0,0 +1,327 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __FStar_UInt128_Verified_H +#define __FStar_UInt128_Verified_H + +#include "FStar_UInt_8_16_32_64.h" +#include +#include +#include "krml/internal/types.h" +#include "krml/internal/target.h" +static inline uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U; +} + +static inline uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return FStar_UInt128_sub_mod_impl(a, b); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +} + +static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +static inline uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) { + return a; + } else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << (s - FStar_UInt128_u32_64); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) { + return FStar_UInt128_shift_left_small(a, s); + } else { + return FStar_UInt128_shift_left_large(a, s); + } +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) { + return a; + } else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = a.high >> (s - FStar_UInt128_u32_64); + lit.high = (uint64_t)0U; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) { + return FStar_UInt128_shift_right_small(a, s); + } else { + return FStar_UInt128_shift_right_large(a, s); + } +} + +static inline bool +FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +static inline bool +FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low > b.low); +} + +static inline bool +FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low < b.low); +} + +static inline bool +FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low >= b.low); +} + +static inline bool +FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low <= b.low); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + lit.high = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +} + +static inline uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +static inline uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +static inline uint64_t +FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = + ((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32; + return lit; +} + +static inline uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); + lit.high = + (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) + + (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) + + ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >> + FStar_UInt128_u32_32); + return lit; +} + +#define __FStar_UInt128_Verified_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h new file mode 100644 index 0000000000..51f3eead1e --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h @@ -0,0 +1,218 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __FStar_UInt_8_16_32_64_H +#define __FStar_UInt_8_16_32_64_H + +#include +#include +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" +extern Prims_int FStar_UInt64_n; + +extern bool FStar_UInt64_uu___is_Mk(uint64_t projectee); + +extern Prims_int FStar_UInt64___proj__Mk__item__v(uint64_t projectee); + +extern Prims_int FStar_UInt64_v(uint64_t x); + +extern uint64_t FStar_UInt64_uint_to_t(Prims_int x); + +extern uint64_t FStar_UInt64_zero; + +extern uint64_t FStar_UInt64_one; + +extern uint64_t FStar_UInt64_minus(uint64_t a); + +extern uint32_t FStar_UInt64_n_minus_one; + +static inline uint64_t +FStar_UInt64_eq_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a ^ b; + uint64_t minus_x = ~x + (uint64_t)1U; + uint64_t x_or_minus_x = x | minus_x; + uint64_t xnx = x_or_minus_x >> (uint32_t)63U; + return xnx - (uint64_t)1U; +} + +static inline uint64_t +FStar_UInt64_gte_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a; + uint64_t y = b; + uint64_t x_xor_y = x ^ y; + uint64_t x_sub_y = x - y; + uint64_t x_sub_y_xor_y = x_sub_y ^ y; + uint64_t q = x_xor_y | x_sub_y_xor_y; + uint64_t x_xor_q = x ^ q; + uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U; + return x_xor_q_ - (uint64_t)1U; +} + +extern Prims_string FStar_UInt64_to_string(uint64_t uu___); + +extern Prims_string FStar_UInt64_to_string_hex(uint64_t uu___); + +extern Prims_string FStar_UInt64_to_string_hex_pad(uint64_t uu___); + +extern uint64_t FStar_UInt64_of_string(Prims_string uu___); + +extern Prims_int FStar_UInt32_n; + +extern bool FStar_UInt32_uu___is_Mk(uint32_t projectee); + +extern Prims_int FStar_UInt32___proj__Mk__item__v(uint32_t projectee); + +extern Prims_int FStar_UInt32_v(uint32_t x); + +extern uint32_t FStar_UInt32_uint_to_t(Prims_int x); + +extern uint32_t FStar_UInt32_zero; + +extern uint32_t FStar_UInt32_one; + +extern uint32_t FStar_UInt32_minus(uint32_t a); + +extern uint32_t FStar_UInt32_n_minus_one; + +static inline uint32_t +FStar_UInt32_eq_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a ^ b; + uint32_t minus_x = ~x + (uint32_t)1U; + uint32_t x_or_minus_x = x | minus_x; + uint32_t xnx = x_or_minus_x >> (uint32_t)31U; + return xnx - (uint32_t)1U; +} + +static inline uint32_t +FStar_UInt32_gte_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a; + uint32_t y = b; + uint32_t x_xor_y = x ^ y; + uint32_t x_sub_y = x - y; + uint32_t x_sub_y_xor_y = x_sub_y ^ y; + uint32_t q = x_xor_y | x_sub_y_xor_y; + uint32_t x_xor_q = x ^ q; + uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U; + return x_xor_q_ - (uint32_t)1U; +} + +extern Prims_string FStar_UInt32_to_string(uint32_t uu___); + +extern Prims_string FStar_UInt32_to_string_hex(uint32_t uu___); + +extern Prims_string FStar_UInt32_to_string_hex_pad(uint32_t uu___); + +extern uint32_t FStar_UInt32_of_string(Prims_string uu___); + +extern Prims_int FStar_UInt16_n; + +extern bool FStar_UInt16_uu___is_Mk(uint16_t projectee); + +extern Prims_int FStar_UInt16___proj__Mk__item__v(uint16_t projectee); + +extern Prims_int FStar_UInt16_v(uint16_t x); + +extern uint16_t FStar_UInt16_uint_to_t(Prims_int x); + +extern uint16_t FStar_UInt16_zero; + +extern uint16_t FStar_UInt16_one; + +extern uint16_t FStar_UInt16_minus(uint16_t a); + +extern uint32_t FStar_UInt16_n_minus_one; + +static inline uint16_t +FStar_UInt16_eq_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a ^ b; + uint16_t minus_x = ~x + (uint16_t)1U; + uint16_t x_or_minus_x = x | minus_x; + uint16_t xnx = x_or_minus_x >> (uint32_t)15U; + return xnx - (uint16_t)1U; +} + +static inline uint16_t +FStar_UInt16_gte_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a; + uint16_t y = b; + uint16_t x_xor_y = x ^ y; + uint16_t x_sub_y = x - y; + uint16_t x_sub_y_xor_y = x_sub_y ^ y; + uint16_t q = x_xor_y | x_sub_y_xor_y; + uint16_t x_xor_q = x ^ q; + uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U; + return x_xor_q_ - (uint16_t)1U; +} + +extern Prims_string FStar_UInt16_to_string(uint16_t uu___); + +extern Prims_string FStar_UInt16_to_string_hex(uint16_t uu___); + +extern Prims_string FStar_UInt16_to_string_hex_pad(uint16_t uu___); + +extern uint16_t FStar_UInt16_of_string(Prims_string uu___); + +extern Prims_int FStar_UInt8_n; + +extern bool FStar_UInt8_uu___is_Mk(uint8_t projectee); + +extern Prims_int FStar_UInt8___proj__Mk__item__v(uint8_t projectee); + +extern Prims_int FStar_UInt8_v(uint8_t x); + +extern uint8_t FStar_UInt8_uint_to_t(Prims_int x); + +extern uint8_t FStar_UInt8_zero; + +extern uint8_t FStar_UInt8_one; + +extern uint8_t FStar_UInt8_minus(uint8_t a); + +extern uint32_t FStar_UInt8_n_minus_one; + +static inline uint8_t +FStar_UInt8_eq_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a ^ b; + uint8_t minus_x = ~x + (uint8_t)1U; + uint8_t x_or_minus_x = x | minus_x; + uint8_t xnx = x_or_minus_x >> (uint32_t)7U; + return xnx - (uint8_t)1U; +} + +static inline uint8_t +FStar_UInt8_gte_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a; + uint8_t y = b; + uint8_t x_xor_y = x ^ y; + uint8_t x_sub_y = x - y; + uint8_t x_sub_y_xor_y = x_sub_y ^ y; + uint8_t q = x_xor_y | x_sub_y_xor_y; + uint8_t x_xor_q = x ^ q; + uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U; + return x_xor_q_ - (uint8_t)1U; +} + +extern Prims_string FStar_UInt8_to_string(uint8_t uu___); + +extern Prims_string FStar_UInt8_to_string_hex(uint8_t uu___); + +extern Prims_string FStar_UInt8_to_string_hex_pad(uint8_t uu___); + +extern uint8_t FStar_UInt8_of_string(Prims_string uu___); + +typedef uint8_t FStar_UInt8_byte; + +#define __FStar_UInt_8_16_32_64_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h new file mode 100644 index 0000000000..5feb077a48 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h @@ -0,0 +1,25 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __LowStar_Endianness_H +#define __LowStar_Endianness_H + +#include "FStar_UInt128.h" +#include +#include +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" +static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_le(uint8_t *x0); + +static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_be(uint8_t *x0); + +#define __LowStar_Endianness_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic new file mode 100644 index 0000000000..672b58015c --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic @@ -0,0 +1,56 @@ +# A basic Makefile that KaRaMeL copies in the output directory; this is not +# guaranteed to work and will only work well for very simple projects. This +# Makefile uses: +# - the custom C files passed to your krml invocation +# - the custom C flags passed to your krml invocation +# - the -o option passed to your krml invocation + +include Makefile.include + +ifeq (,$(KRML_HOME)) + $(error please define KRML_HOME to point to the root of your KaRaMeL git checkout) +endif + +CFLAGS += -I. -I $(KRML_HOME)/include -I $(KRML_HOME)/krmllib/dist/minimal +CFLAGS += -Wall -Wextra -Werror -std=c11 -Wno-unused-variable \ + -Wno-unknown-warning-option -Wno-unused-but-set-variable -Wno-unused-function \ + -Wno-unused-parameter -Wno-infinite-recursion \ + -g -fwrapv -D_BSD_SOURCE -D_DEFAULT_SOURCE +ifeq ($(OS),Windows_NT) +CFLAGS += -D__USE_MINGW_ANSI_STDIO +else +CFLAGS += -fPIC +endif +CFLAGS += $(USER_CFLAGS) + +SOURCES += $(ALL_C_FILES) $(USER_C_FILES) +ifneq (,$(BLACKLIST)) + SOURCES := $(filter-out $(BLACKLIST),$(SOURCES)) +endif +OBJS += $(patsubst %.c,%.o,$(SOURCES)) + +all: $(USER_TARGET) + +$(USER_TARGET): $(OBJS) + +AR ?= ar + +%.a: + $(AR) cr $@ $^ + +%.exe: + $(CC) $(CFLAGS) -o $@ $^ $(KRML_HOME)/krmllib/dist/generic/libkrmllib.a + +%.so: + $(CC) $(CFLAGS) -shared -o $@ $^ + +%.d: %.c + @set -e; rm -f $@; \ + $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ + sed 's,\($(notdir $*)\)\.o[ :]*,$(dir $@)\1.o $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +include $(patsubst %.c,%.d,$(SOURCES)) + +clean: + rm -rf *.o *.d $(USER_TARGET) diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include new file mode 100644 index 0000000000..ad53217184 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include @@ -0,0 +1,5 @@ +USER_TARGET=libkrmllib.a +USER_CFLAGS= +USER_C_FILES=fstar_uint128.c +ALL_C_FILES= +ALL_H_FILES=FStar_UInt128.h FStar_UInt_8_16_32_64.h LowStar_Endianness.h diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h new file mode 100644 index 0000000000..33cff6b6d4 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h @@ -0,0 +1,225 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/******************************************************************************/ +/* Machine integers (128-bit arithmetic) */ +/******************************************************************************/ + +/* This header contains two things. + * + * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and + * Clang, i.e. all the operations from FStar.UInt128. + * + * Second, 128-bit operations from C.Endianness (or LowStar.Endianness), + * suitable for any compiler and platform (via a series of ifdefs). This second + * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to + * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code. + * */ + +/* This file is used for both the minimal and generic krmllib distributions. As + * such, it assumes that the machine integers have been bundled the exact same + * way in both cases. */ + +#ifndef FSTAR_UINT128_GCC64 +#define FSTAR_UINT128_GCC64 + +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" +#include "LowStar_Endianness.h" + +/* GCC + using native unsigned __int128 support */ + +inline static uint128_t +load128_le(uint8_t *b) +{ + uint128_t l = (uint128_t)load64_le(b); + uint128_t h = (uint128_t)load64_le(b + 8); + return (h << 64 | l); +} + +inline static void +store128_le(uint8_t *b, uint128_t n) +{ + store64_le(b, (uint64_t)n); + store64_le(b + 8, (uint64_t)(n >> 64)); +} + +inline static uint128_t +load128_be(uint8_t *b) +{ + uint128_t h = (uint128_t)load64_be(b); + uint128_t l = (uint128_t)load64_be(b + 8); + return (h << 64 | l); +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, (uint64_t)(n >> 64)); + store64_be(b + 8, (uint64_t)n); +} + +inline static uint128_t +FStar_UInt128_add(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_mul(uint128_t x, uint128_t y) +{ + return x * y; +} + +inline static uint128_t +FStar_UInt128_add_mod(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_sub(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static uint128_t +FStar_UInt128_sub_mod(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static uint128_t +FStar_UInt128_logand(uint128_t x, uint128_t y) +{ + return x & y; +} + +inline static uint128_t +FStar_UInt128_logor(uint128_t x, uint128_t y) +{ + return x | y; +} + +inline static uint128_t +FStar_UInt128_logxor(uint128_t x, uint128_t y) +{ + return x ^ y; +} + +inline static uint128_t +FStar_UInt128_lognot(uint128_t x) +{ + return ~x; +} + +inline static uint128_t +FStar_UInt128_shift_left(uint128_t x, uint32_t y) +{ + return x << y; +} + +inline static uint128_t +FStar_UInt128_shift_right(uint128_t x, uint32_t y) +{ + return x >> y; +} + +inline static uint128_t +FStar_UInt128_uint64_to_uint128(uint64_t x) +{ + return (uint128_t)x; +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(uint128_t x) +{ + return (uint64_t)x; +} + +inline static uint128_t +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + return ((uint128_t)x) * y; +} + +inline static uint128_t +FStar_UInt128_eq_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) & + FStar_UInt64_eq_mask(x, y); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint128_t +FStar_UInt128_gte_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + (FStar_UInt64_gte_mask(x >> 64, y >> 64) & + ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) | + (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y)); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__low(uint128_t x) +{ + return (uint64_t)x; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__high(uint128_t x) +{ + return (uint64_t)(x >> 64); +} + +inline static uint128_t +FStar_UInt128_add_underspec(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_sub_underspec(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static bool +FStar_UInt128_eq(uint128_t x, uint128_t y) +{ + return x == y; +} + +inline static bool +FStar_UInt128_gt(uint128_t x, uint128_t y) +{ + return x > y; +} + +inline static bool +FStar_UInt128_lt(uint128_t x, uint128_t y) +{ + return x < y; +} + +inline static bool +FStar_UInt128_gte(uint128_t x, uint128_t y) +{ + return x >= y; +} + +inline static bool +FStar_UInt128_lte(uint128_t x, uint128_t y) +{ + return x <= y; +} + +inline static uint128_t +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + return (uint128_t)x * (uint128_t)y; +} + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h new file mode 100644 index 0000000000..e9b366e259 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h @@ -0,0 +1,571 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file was generated by KaRaMeL + * then hand-edited to use MSVC intrinsics KaRaMeL invocation: + * C:\users\barrybo\mitls2c\karamel\_build\src\Karamel.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "krmllib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims + * F* version: 15104ff8 + * KaRaMeL version: 318b7fa8 + */ + +#ifndef FSTAR_UINT128_MSVC +#define FSTAR_UINT128_MSVC + +#include "krml/internal/types.h" +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" + +#ifndef _MSC_VER +#error This file only works with the MSVC compiler +#endif + +/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now + * done properly and only include this file when we know for sure we are on + * 64-bit MSVC. */ + +#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128) +#define HAS_OPTIMIZED 1 +#else +#define HAS_OPTIMIZED 0 +#endif + +// Define .low and .high in terms of the __m128i fields, to reduce +// the amount of churn in this file. +#if HAS_OPTIMIZED +#include +#include +#define low m128i_u64[0] +#define high m128i_u64[1] +#endif + +inline static FStar_UInt128_uint128 +load128_le(uint8_t *b) +{ +#if HAS_OPTIMIZED + return _mm_loadu_si128((__m128i *)b); +#else + FStar_UInt128_uint128 lit; + lit.low = load64_le(b); + lit.high = load64_le(b + 8); + return lit; +#endif +} + +inline static void +store128_le(uint8_t *b, FStar_UInt128_uint128 n) +{ + store64_le(b, n.low); + store64_le(b + 8, n.high); +} + +inline static FStar_UInt128_uint128 +load128_be(uint8_t *b) +{ + uint64_t l = load64_be(b + 8); + uint64_t h = load64_be(b); +#if HAS_OPTIMIZED + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = l; + lit.high = h; + return lit; +#endif +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, n.high); + store64_be(b + 8, n.low); +} + +inline static uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U; +} + +inline static uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char carry = + _addcarry_u64(0, a.low, b.low, &l); // low/CF = a.low+b.low+0 + _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l); + _subborrow_u64(borrow, a.high, b.high, &h); + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + return FStar_UInt128_sub_mod_impl(a, b); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_and_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_xor_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_or_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ +#if HAS_OPTIMIZED + return _mm_andnot_si128(a, a); +#else + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +#endif +} + +static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +inline static uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> FStar_UInt128_u32_64 - s); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << s - FStar_UInt128_u32_64; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = a.low << s; + uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s); + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_left_small(a, s); + else + return FStar_UInt128_shift_left_large(a, s); +#endif +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << FStar_UInt128_u32_64 - s); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = a.high >> s - FStar_UInt128_u32_64; + lit.high = (uint64_t)0U; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s); + uint64_t h = a.high >> s; + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64)); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_right_small(a, s); + else + return FStar_UInt128_shift_right_large(a, s); +#endif +} + +inline static bool +FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +inline static bool +FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || a.high == b.high && a.low > b.low; +} + +inline static bool +FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || a.high == b.high && a.low < b.low; +} + +inline static bool +FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || a.high == b.high && a.low >= b.low; +} + +inline static bool +FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || a.high == b.high && a.low <= b.low; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff + __m128i r32 = _mm_cmpeq_epi32(a, b); + // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half) + __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1)); + // Bitwise and to compute (3&2),(2&3),(1&0),(0&1) + __m128i ret64 = _mm_and_si128(r32, s32); + // Swap the two 64-bit values to form s64 + __m128i s64 = + _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2 + // And them together + return _mm_and_si128(ret64, s64); +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED && 0 + // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each + // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each + // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3) + // then splat slot 0 to 3,2,1,0 + __m128i gt = _mm_cmpgt_epi32(a, b); + __m128i eq = _mm_cmpeq_epi32(a, b); + __m128i ge = _mm_or_si128(gt, eq); + __m128i ge0 = ge; + __m128i eq0 = eq; + __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1 + __m128i t1 = _mm_and_si128(eq0, ge1); + __m128i ret = _mm_or_si128(ge, t1); // ge0 | (eq0 & ge1) is now in 0 + __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1 + __m128i ge2 = + _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2 + __m128i t2 = + _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2) + ret = _mm_or_si128(ret, t2); + __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3 + __m128i ge3 = + _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3 + __m128i t3 = _mm_and_si128( + eq0, _mm_and_si128( + eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3) + ret = _mm_or_si128(ret, t3); + return _mm_shuffle_epi32( + ret, + _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords. +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + lit.high = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ +#if HAS_OPTIMIZED + return _mm_set_epi64x(0, a); +#else + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +#endif +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +inline static uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +inline static uint64_t +FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, (uint64_t)y, &h); + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine( + (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32) >> + FStar_UInt128_u32_32; + return lit; +#endif +} + +/* Note: static headers bring scope collision issues when they define types! + * Because now client (karamel-generated) code will include this header and + * there might be type collisions if the client code uses quadruples of uint64s. + * So, we cannot use the karamel-generated name. */ +typedef struct K_quad_s { + uint64_t fst; + uint64_t snd; + uint64_t thd; + uint64_t f3; +} K_quad; + +inline static K_quad +FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) +{ + K_quad tmp; + tmp.fst = FStar_UInt128_u64_mod_32(x); + tmp.snd = FStar_UInt128_u64_mod_32( + FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)); + tmp.thd = x >> FStar_UInt128_u32_32; + tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> + FStar_UInt128_u32_32); + return tmp; +} + +static uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) +{ + K_quad scrut = + FStar_UInt128_mul_wide_impl_t_(x, y); + uint64_t u1 = scrut.fst; + uint64_t w3 = scrut.snd; + uint64_t x_ = scrut.thd; + uint64_t t_ = scrut.f3; + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine_( + u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3); + lit.high = + x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + + (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >> + FStar_UInt128_u32_32); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, y, &h); + return _mm_set_epi64x(h, l); +#else + return FStar_UInt128_mul_wide_impl(x, y); +#endif +} + +#undef low +#undef high + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h new file mode 100644 index 0000000000..61fe85c49e --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h @@ -0,0 +1,84 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H +#define FSTAR_UINT128_STRUCT_ENDIANNESS_H + +/* Hand-written implementation of endianness-related uint128 functions + * for the extracted uint128 implementation */ + +/* Access 64-bit fields within the int128. */ +#define HIGH64_OF(x) ((x)->high) +#define LOW64_OF(x) ((x)->low) + +/* A series of definitions written using pointers. */ + +inline static void +load128_le_(uint8_t *b, uint128_t *r) +{ + LOW64_OF(r) = load64_le(b); + HIGH64_OF(r) = load64_le(b + 8); +} + +inline static void +store128_le_(uint8_t *b, uint128_t *n) +{ + store64_le(b, LOW64_OF(n)); + store64_le(b + 8, HIGH64_OF(n)); +} + +inline static void +load128_be_(uint8_t *b, uint128_t *r) +{ + HIGH64_OF(r) = load64_be(b); + LOW64_OF(r) = load64_be(b + 8); +} + +inline static void +store128_be_(uint8_t *b, uint128_t *n) +{ + store64_be(b, HIGH64_OF(n)); + store64_be(b + 8, LOW64_OF(n)); +} + +#ifndef KRML_NOSTRUCT_PASSING + +inline static uint128_t +load128_le(uint8_t *b) +{ + uint128_t r; + load128_le_(b, &r); + return r; +} + +inline static void +store128_le(uint8_t *b, uint128_t n) +{ + store128_le_(b, &n); +} + +inline static uint128_t +load128_be(uint8_t *b) +{ + uint128_t r; + load128_be_(b, &r); + return r; +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store128_be_(b, &n); +} + +#else /* !defined(KRML_STRUCT_PASSING) */ + +#define print128 print128_ +#define load128_le load128_le_ +#define store128_le store128_le_ +#define load128_be load128_be_ +#define store128_be store128_be_ + +#endif /* KRML_STRUCT_PASSING */ + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def new file mode 100644 index 0000000000..c4ab8e38ed --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def @@ -0,0 +1,11 @@ +LIBRARY libkrmllib + +EXPORTS + FStar_UInt64_eq_mask + FStar_UInt64_gte_mask + FStar_UInt32_eq_mask + FStar_UInt32_gte_mask + FStar_UInt16_eq_mask + FStar_UInt16_gte_mask + FStar_UInt8_eq_mask + FStar_UInt8_gte_mask diff --git a/security/nss/lib/freebl/verified/libintvector.h b/security/nss/lib/freebl/verified/libintvector.h new file mode 100644 index 0000000000..fab6a35d18 --- /dev/null +++ b/security/nss/lib/freebl/verified/libintvector.h @@ -0,0 +1,915 @@ +#ifndef __Vec_Intrin_H +#define __Vec_Intrin_H + +#include + +/* We include config.h here to ensure that the various feature-flags are + * properly brought into scope. Users can either run the configure script, or + * write a config.h themselves and put it under version control. */ +#if defined(__has_include) +#if __has_include("config.h") +#include "config.h" +#endif +#endif + +/* # DEBUGGING: + * ============ + * It is possible to debug the current definitions by using libintvector_debug.h + * See the include at the bottom of the file. */ + +#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) + +#if defined(__x86_64__) || defined(_M_X64) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include +#include +#include + +typedef __m128i Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \ + (_mm_aesenc_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \ + (_mm_aesenclast_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \ + (_mm_aeskeygenassist_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \ + (_mm_clmulepi64_si128(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (_mm_xor_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (_mm_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (_mm_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (_mm_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (_mm_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (_mm_or_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (_mm_and_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (_mm_xor_si128(x0, _mm_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (_mm_slli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (_mm_srli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (_mm_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (_mm_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (_mm_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (_mm_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4))) + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (_mm_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (_mm_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (_mm_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (_mm_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (_mm_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (_mm_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (_mm_setzero_si128()) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (_mm_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (_mm_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (_mm_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (_mm_mul_epu32(x0, _mm_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (_mm_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (_mm_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (_mm_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (_mm_mullo_epi32(x0, _mm_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((__m128i)x) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (_mm_set1_epi64x(x)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \ + (_mm_set_epi64x(x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (_mm_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (_mm_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (_mm_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (_mm_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (_mm_unpackhi_epi64(x1, x2)) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#if defined(HACL_CAN_COMPILE_VEC256) + +#include +#include + +typedef __m256i Lib_IntVector_Intrinsics_vec256; + +#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \ + (_mm256_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \ + (_mm256_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \ + (_mm256_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \ + (_mm256_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \ + (_mm256_xor_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \ + (_mm256_or_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \ + (_mm256_and_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \ + (_mm256_xor_si256(x0, _mm256_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \ + (_mm256_slli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \ + (_mm256_srli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \ + (_mm256_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \ + (_mm256_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \ + (_mm256_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \ + (_mm256_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1)))))))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) + +#define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \ + (_mm256_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \ + (_mm256_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \ + (_mm256_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \ + (_mm256_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \ + (_mm256_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \ + (_mm256_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_zero \ + (_mm256_setzero_si256()) + +#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \ + (_mm256_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \ + (_mm256_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \ + (_mm256_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \ + (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \ + (_mm256_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \ + (_mm256_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \ + (_mm256_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \ + (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec256_load64(x1) \ + (_mm256_set1_epi64x(x1)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \ + (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load32(x) \ + (_mm256_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \ + (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load128(x) \ + (_mm256_set_m128i((__m128i)x)) + +#define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \ + (_mm256_set_m128i((__m128i)x1, (__m128i)x0)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \ + (_mm256_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \ + (_mm256_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \ + (_mm256_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \ + (_mm256_unpackhi_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x20)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x31)) + +#endif /* HACL_CAN_COMPILE_VEC256 */ + +#elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include + +typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (veorq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (vceqq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (vceqq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (vcgtq_u32(x0, x1)) + +#define high32(x0) \ + (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32))) + +#define low32(x0) \ + (vmovn_u64(vreinterpretq_u64_u32(x0))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (vorrq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (vandq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (vmvnq_u32(x0)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (vshlq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (vshrq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (vextq_u32(x0, x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (vextq_u64(x0, x0, x1)) + +/* +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +/* +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0))); + +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +/* +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (vsetq_lane_u8(x1, x0, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (vsetq_lane_u32(x1, x0, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (vgetq_lane_u8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (vgetq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (vdupq_n_u32(0)) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (vaddq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (vsubq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (vmulq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (vmulq_lane_u32(x0, vdupq_n_u32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((uint32x4_t)(x)) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (vdupq_n_u32(x)) /* hi lo */ + +static inline Lib_IntVector_Intrinsics_vec128 +Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2) +{ + const uint64_t a[2] = { x1, x2 }; + return vreinterpretq_u32_u64(vld1q_u64(a)); +} + +static inline Lib_IntVector_Intrinsics_vec128 +Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) +{ + const uint32_t a[4] = { x1, x2, x3, x4 }; + return vld1q_u32(a); +} + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (vzip1q_u32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (vzip2q_u32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +/* IBM z architecture */ +#elif defined(__s390x__) /* this flag is for GCC only */ + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include +#include + +/* The main vector 128 type + * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, + * unsigned int, unsigned long long: the compiler complains that the parameter + * combination is invalid. */ +typedef unsigned char vector128_8 __attribute__((vector_size(16))); +typedef unsigned int vector128_32 __attribute__((vector_size(16))); +typedef unsigned long long vector128_64 __attribute__((vector_size(16))); + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x) \ + (vector128)(*((vector128_32*)(const uint8_t*)(x))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x)))) + +static inline void +Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = vec_revb((vector128_32)x1); +} + +static inline void +Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = (vector128_32)x1; +} + +static inline void +Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_64 *)x0) = vec_revb((vector128_64)x1); +} + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mulo((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \ + ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1), \ + 0xffffffff >> (x1), 0xffffffff >> (x1) }))) + +/* Doesn't work with vec_splat_u64 */ +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +static inline vector128 +Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1) +{ + return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))); +} + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include +#include // for memcpy +#include + +// The main vector 128 type +// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, +// unsigned int, unsigned long long: the compiler complains that the parameter +// combination is invalid. +typedef vector unsigned char vector128_8; +typedef vector unsigned int vector128_32; +typedef vector unsigned long long vector128_64; + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mule((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +// Doesn't work with vec_splat_u64 +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#endif // PowerPC64 + +// DEBUGGING: +// If libintvector_debug.h exists, use it to debug the current implementations. +// Note that some flags must be enabled for the debugging to be effective: +// see libintvector_debug.h for more details. +#if defined(__has_include) +#if __has_include("libintvector_debug.h") +#include "libintvector_debug.h" +#endif +#endif + +#endif // __Vec_Intrin_H diff --git a/security/nss/lib/freebl/win_rand.c b/security/nss/lib/freebl/win_rand.c new file mode 100644 index 0000000000..b863776d21 --- /dev/null +++ b/security/nss/lib/freebl/win_rand.c @@ -0,0 +1,161 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "secrng.h" + +#ifdef XP_WIN +#include +#include + +static BOOL +CurrentClockTickTime(LPDWORD lpdwHigh, LPDWORD lpdwLow) +{ + LARGE_INTEGER liCount; + + if (!QueryPerformanceCounter(&liCount)) + return FALSE; + + *lpdwHigh = liCount.u.HighPart; + *lpdwLow = liCount.u.LowPart; + return TRUE; +} + +size_t +RNG_GetNoise(void *buf, size_t maxbuf) +{ + DWORD dwHigh, dwLow, dwVal; + int n = 0; + int nBytes; + time_t sTime; + + if (maxbuf <= 0) + return 0; + + CurrentClockTickTime(&dwHigh, &dwLow); + + // get the maximally changing bits first + nBytes = sizeof(dwLow) > maxbuf ? maxbuf : sizeof(dwLow); + memcpy((char *)buf, &dwLow, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + nBytes = sizeof(dwHigh) > maxbuf ? maxbuf : sizeof(dwHigh); + memcpy(((char *)buf) + n, &dwHigh, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + // get the number of milliseconds that have elapsed since Windows started + dwVal = GetTickCount(); + + nBytes = sizeof(dwVal) > maxbuf ? maxbuf : sizeof(dwVal); + memcpy(((char *)buf) + n, &dwVal, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + // get the time in seconds since midnight Jan 1, 1970 + time(&sTime); + nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime); + memcpy(((char *)buf) + n, &sTime, nBytes); + n += nBytes; + + return n; +} + +void +RNG_SystemInfoForRNG(void) +{ + DWORD dwVal; + char buffer[256]; + int nBytes; + MEMORYSTATUS sMem; + HANDLE hVal; + DWORD dwSerialNum; + DWORD dwComponentLen; + DWORD dwSysFlags; + char volName[128]; + DWORD dwSectors, dwBytes, dwFreeClusters, dwNumClusters; + + nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes + RNG_RandomUpdate(buffer, nBytes); + + sMem.dwLength = sizeof(sMem); + GlobalMemoryStatus(&sMem); // assorted memory stats + RNG_RandomUpdate(&sMem, sizeof(sMem)); + + dwVal = GetLogicalDrives(); + RNG_RandomUpdate(&dwVal, sizeof(dwVal)); // bitfields in bits 0-25 + + dwVal = sizeof(buffer); + if (GetComputerName(buffer, &dwVal)) + RNG_RandomUpdate(buffer, dwVal); + + hVal = GetCurrentProcess(); // 4 or 8 byte pseudo handle (a + // constant!) of current process + RNG_RandomUpdate(&hVal, sizeof(hVal)); + + dwVal = GetCurrentProcessId(); // process ID (4 bytes) + RNG_RandomUpdate(&dwVal, sizeof(dwVal)); + + dwVal = GetCurrentThreadId(); // thread ID (4 bytes) + RNG_RandomUpdate(&dwVal, sizeof(dwVal)); + + volName[0] = '\0'; + buffer[0] = '\0'; + GetVolumeInformation(NULL, + volName, + sizeof(volName), + &dwSerialNum, + &dwComponentLen, + &dwSysFlags, + buffer, + sizeof(buffer)); + + RNG_RandomUpdate(volName, strlen(volName)); + RNG_RandomUpdate(&dwSerialNum, sizeof(dwSerialNum)); + RNG_RandomUpdate(&dwComponentLen, sizeof(dwComponentLen)); + RNG_RandomUpdate(&dwSysFlags, sizeof(dwSysFlags)); + RNG_RandomUpdate(buffer, strlen(buffer)); + + if (GetDiskFreeSpace(NULL, &dwSectors, &dwBytes, &dwFreeClusters, + &dwNumClusters)) { + RNG_RandomUpdate(&dwSectors, sizeof(dwSectors)); + RNG_RandomUpdate(&dwBytes, sizeof(dwBytes)); + RNG_RandomUpdate(&dwFreeClusters, sizeof(dwFreeClusters)); + RNG_RandomUpdate(&dwNumClusters, sizeof(dwNumClusters)); + } + + nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes + RNG_RandomUpdate(buffer, nBytes); +} + +/* + * The RtlGenRandom function is declared in , but the + * declaration is missing a calling convention specifier. So we + * declare it manually here. + */ +#define RtlGenRandom SystemFunction036 +DECLSPEC_IMPORT BOOLEAN WINAPI RtlGenRandom( + PVOID RandomBuffer, + ULONG RandomBufferLength); + +size_t +RNG_SystemRNG(void *dest, size_t maxLen) +{ + size_t bytes = 0; + + if (RtlGenRandom(dest, maxLen)) { + bytes = maxLen; + } + return bytes; +} +#endif /* is XP_WIN */ -- cgit v1.2.3